LUCENE-5339: add OrdinalsReader + Cache to abstract the source of the ords

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene5339@1542773 13f79535-47bb-0310-9956-ffa450edef68
2013-11-17 17:56:31 +00:00 · 2013-11-17 17:56:31 +00:00 · 7a0ee102d0
parent 39f6c8a5ff
commit 7a0ee102d0
9 changed files with 527 additions and 66 deletions
--- a/3
+++ b/3
@ -2,10 +2,8 @@ nocommit this!

 TODO
  - associations
-  - ords cache
  - wrap an IW instead of extending one?
  - re-enable ALL_BUT_DIM somehow?
-  - abstraction for 'ords source/decode'
  - simplify ddq api
  - SSDVValueSourceFacets?
  - we could put more stuff into the "schema", e.g. this field is
@ -15,7 +13,6 @@ TODO
  - rename CategoryPath -> FacetLabel
  - how to do avg() agg?
  - test needsScores=true / valuesource associations
-  - drill sideways
  - make FieldTypes optional (if all your dims are flat)?
  - add hierarchy to ssdv facets?
  - sparse faceting: allow skipping of certain dims?
--- a/lucene/facet/src/java/org/apache/lucene/facet/simple/CachedOrdinalsReader.java
+++ b/lucene/facet/src/java/org/apache/lucene/facet/simple/CachedOrdinalsReader.java
@ -0,0 +1,130 @@
+package org.apache.lucene.facet.simple;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.Map;
+import java.util.WeakHashMap;
+
+import org.apache.lucene.index.AtomicReaderContext;
+import org.apache.lucene.util.ArrayUtil;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.IntsRef;
+
+/**
+ * A per-segment cache of documents' facet ordinals. Every
+ * {@link CachedOrds} holds the ordinals in a raw {@code
+ * int[]}, and therefore consumes as much RAM as the total
+ * number of ordinals found in the segment, but saves the
+ * CPU cost of decoding ordinals during facet counting.
+ * 
+ * <p>
+ * <b>NOTE:</b> every {@link CachedOrds} is limited to 2.1B
+ * total ordinals. If that is a limitation for you then
+ * consider limiting the segment size to fewer documents, or
+ * use an alternative cache which pages through the category
+ * ordinals.
+ * 
+ * <p>
+ * <b>NOTE:</b> when using this cache, it is advised to use
+ * a {@link DocValuesFormat} that does not cache the data in
+ * memory, at least for the category lists fields, or
+ * otherwise you'll be doing double-caching.
+ */
+public class CachedOrdinalsReader extends OrdinalsReader {
+
+  private final OrdinalsReader source;
+  private CachedOrds current;
+
+  // outer map is a WeakHashMap which uses reader.getCoreCacheKey() as the weak
+  // reference. When it's no longer referenced, the entire inner map can be
+  // evicted.
+  private final Map<Object,CachedOrds> ordsCache = new WeakHashMap<Object,CachedOrds>();
+
+  public CachedOrdinalsReader(OrdinalsReader source) {
+    this.source = source;
+  }
+
+  private synchronized CachedOrds getCachedOrds(AtomicReaderContext context) throws IOException {
+    Object cacheKey = context.reader().getCoreCacheKey();
+    CachedOrds ords = ordsCache.get(cacheKey);
+    if (ords == null) {
+      ords = new CachedOrds(source.getReader(context), context.reader().maxDoc());
+      ordsCache.put(cacheKey, ords);
+    }
+
+    return ords;
+  }
+
+  @Override
+  public OrdinalsSegmentReader getReader(AtomicReaderContext context) throws IOException {
+    final CachedOrds cachedOrds = getCachedOrds(context);
+    return new OrdinalsSegmentReader() {
+      @Override
+      public void get(int docID, IntsRef ordinals) {
+        ordinals.ints = cachedOrds.ordinals;
+        ordinals.offset = cachedOrds.offsets[docID];
+        ordinals.length = cachedOrds.offsets[docID+1] - ordinals.offset;
+      }
+    };
+  }
+
+  /** Holds the cached ordinals in two paralel {@code int[]} arrays. */
+  public static final class CachedOrds {
+    
+    public final int[] offsets;
+    public final int[] ordinals;
+
+    /**
+     * Creates a new {@link CachedOrds} from the {@link BinaryDocValues}.
+     * Assumes that the {@link BinaryDocValues} is not {@code null}.
+     */
+    public CachedOrds(OrdinalsSegmentReader source, int maxDoc) throws IOException {
+      final BytesRef buf = new BytesRef();
+
+      offsets = new int[maxDoc + 1];
+      int[] ords = new int[maxDoc]; // let's assume one ordinal per-document as an initial size
+
+      // this aggregator is limited to Integer.MAX_VALUE total ordinals.
+      long totOrds = 0;
+      final IntsRef values = new IntsRef(32);
+      for (int docID = 0; docID < maxDoc; docID++) {
+        offsets[docID] = (int) totOrds;
+        source.get(docID, values);
+        long nextLength = totOrds + values.length;
+        if (nextLength > ords.length) {
+          if (nextLength > ArrayUtil.MAX_ARRAY_LENGTH) {
+            throw new IllegalStateException("too many ordinals (>= " + nextLength + ") to cache");
+          }
+          ords = ArrayUtil.grow(ords, (int) nextLength);
+        }
+        System.arraycopy(values.ints, 0, ords, (int) totOrds, values.length);
+        totOrds = nextLength;
+      }
+      offsets[maxDoc] = (int) totOrds;
+      
+      // if ords array is bigger by more than 10% of what we really need, shrink it
+      if ((double) totOrds / ords.length < 0.9) { 
+        this.ordinals = new int[(int) totOrds];
+        System.arraycopy(ords, 0, this.ordinals, 0, (int) totOrds);
+      } else {
+        this.ordinals = ords;
+      }
+    }
+  }
+}
--- a/lucene/facet/src/java/org/apache/lucene/facet/simple/DocValuesOrdinalsReader.java
+++ b/lucene/facet/src/java/org/apache/lucene/facet/simple/DocValuesOrdinalsReader.java
@ -0,0 +1,92 @@
+package org.apache.lucene.facet.simple;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.index.AtomicReaderContext;
+import org.apache.lucene.index.BinaryDocValues;
+import org.apache.lucene.util.ArrayUtil;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.IntsRef;
+
+/** Decodes ordinals previously indexed into a BinaryDocValues field */
+
+public class DocValuesOrdinalsReader extends OrdinalsReader {
+  private final String field;
+
+  public DocValuesOrdinalsReader() {
+    this(FacetsConfig.DEFAULT_INDEXED_FIELD_NAME);
+  }
+
+  public DocValuesOrdinalsReader(String field) {
+    this.field = field;
+  }
+
+  @Override
+  public OrdinalsSegmentReader getReader(AtomicReaderContext context) throws IOException {
+    BinaryDocValues values0 = context.reader().getBinaryDocValues(field);
+    if (values0 == null) {
+      values0 = BinaryDocValues.EMPTY;
+    }
+
+    final BinaryDocValues values = values0;
+
+    return new OrdinalsSegmentReader() {
+      private final BytesRef bytes = new BytesRef(32);
+
+      @Override
+      public void get(int docID, IntsRef ordinals) throws IOException {
+        values.get(docID, bytes);
+        decode(bytes, ordinals);
+      }
+    };
+  }
+
+  /** Subclass & override if you change the encoding. */
+  protected void decode(BytesRef buf, IntsRef ordinals) {
+
+    // grow the buffer up front, even if by a large number of values (buf.length)
+    // that saves the need to check inside the loop for every decoded value if
+    // the buffer needs to grow.
+    if (ordinals.ints.length < buf.length) {
+      ordinals.ints = ArrayUtil.grow(ordinals.ints, buf.length);
+    }
+
+    ordinals.offset = 0;
+    ordinals.length = 0;
+
+    // it is better if the decoding is inlined like so, and not e.g.
+    // in a utility method
+    int upto = buf.offset + buf.length;
+    int value = 0;
+    int offset = buf.offset;
+    int prev = 0;
+    while (offset < upto) {
+      byte b = buf.bytes[offset++];
+      if (b >= 0) {
+        ordinals.ints[ordinals.length] = ((value << 7) | b) + prev;
+        value = 0;
+        prev = ordinals.ints[ordinals.length];
+        ordinals.length++;
+      } else {
+        value = (value << 7) | (b & 0x7F);
+      }
+    }
+  }
+}
--- a/lucene/facet/src/java/org/apache/lucene/facet/simple/FastTaxonomyFacetCounts.java
+++ b/lucene/facet/src/java/org/apache/lucene/facet/simple/FastTaxonomyFacetCounts.java
@ -0,0 +1,228 @@
+package org.apache.lucene.facet.simple;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.lucene.facet.simple.SimpleFacetsCollector.MatchingDocs;
+import org.apache.lucene.facet.taxonomy.FacetLabel;
+import org.apache.lucene.facet.taxonomy.ParallelTaxonomyArrays;
+import org.apache.lucene.facet.taxonomy.TaxonomyReader;
+import org.apache.lucene.index.BinaryDocValues;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.FixedBitSet;
+
+// nocommit jdoc that this assumes/requires the default encoding
+public class FastTaxonomyFacetCounts extends Facets {
+  private final FacetsConfig facetsConfig;
+  private final TaxonomyReader taxoReader;
+  private final int[] counts;
+  private final String facetsFieldName;
+  private final int[] children;
+  private final int[] parents;
+  private final int[] siblings;
+
+  public FastTaxonomyFacetCounts(TaxonomyReader taxoReader, FacetsConfig facetsConfig, SimpleFacetsCollector fc) throws IOException {
+    this(FacetsConfig.DEFAULT_INDEXED_FIELD_NAME, taxoReader, facetsConfig, fc);
+  }
+
+  public FastTaxonomyFacetCounts(String facetsFieldName, TaxonomyReader taxoReader, FacetsConfig facetsConfig, SimpleFacetsCollector fc) throws IOException {
+    this.taxoReader = taxoReader;
+    this.facetsFieldName = facetsFieldName;
+    this.facetsConfig = facetsConfig;
+    ParallelTaxonomyArrays pta = taxoReader.getParallelTaxonomyArrays();
+    children = pta.children();
+    parents = pta.parents();
+    siblings = pta.siblings();
+    counts = new int[taxoReader.getSize()];
+    count(fc.getMatchingDocs());
+  }
+
+  private final void count(List<MatchingDocs> matchingDocs) throws IOException {
+    //System.out.println("count matchingDocs=" + matchingDocs + " facetsField=" + facetsFieldName);
+    for(MatchingDocs hits : matchingDocs) {
+      BinaryDocValues dv = hits.context.reader().getBinaryDocValues(facetsFieldName);
+      if (dv == null) { // this reader does not have DocValues for the requested category list
+        continue;
+      }
+      FixedBitSet bits = hits.bits;
+    
+      final int length = hits.bits.length();
+      int doc = 0;
+      BytesRef scratch = new BytesRef();
+      //System.out.println("count seg=" + hits.context.reader());
+      while (doc < length && (doc = bits.nextSetBit(doc)) != -1) {
+        //System.out.println("  doc=" + doc);
+        dv.get(doc, scratch);
+        byte[] bytes = scratch.bytes;
+        int end = scratch.offset + scratch.length;
+        int ord = 0;
+        int offset = scratch.offset;
+        int prev = 0;
+        while (offset < end) {
+          byte b = bytes[offset++];
+          if (b >= 0) {
+            prev = ord = ((ord << 7) | b) + prev;
+            assert ord < counts.length: "ord=" + ord + " vs maxOrd=" + counts.length;
+            ++counts[ord];
+            ord = 0;
+          } else {
+            ord = (ord << 7) | (b & 0x7F);
+          }
+        }
+        ++doc;
+      }
+    }
+
+    // nocommit we could do this lazily instead:
+
+    // Rollup any necessary dims:
+    for(Map.Entry<String,FacetsConfig.DimConfig> ent : facetsConfig.getDimConfigs().entrySet()) {
+      String dim = ent.getKey();
+      FacetsConfig.DimConfig ft = ent.getValue();
+      if (ft.hierarchical && ft.multiValued == false) {
+        int dimRootOrd = taxoReader.getOrdinal(new FacetLabel(dim));
+        // It can be -1 if this field was declared in the
+        // facetsConfig but never indexed:
+        if (dimRootOrd > 0) {
+          counts[dimRootOrd] += rollup(children[dimRootOrd]);
+        }
+      }
+    }
+  }
+
+  private int rollup(int ord) {
+    int sum = 0;
+    while (ord != TaxonomyReader.INVALID_ORDINAL) {
+      int childValue = counts[ord] + rollup(children[ord]);
+      counts[ord] = childValue;
+      sum += childValue;
+      ord = siblings[ord];
+    }
+    return sum;
+  }
+
+  /** Return the count for a specific path.  Returns -1 if
+   *  this path doesn't exist, else the count. */
+  @Override
+  public Number getSpecificValue(String dim, String... path) throws IOException {
+    int ord = taxoReader.getOrdinal(FacetLabel.create(dim, path));
+    if (ord < 0) {
+      return -1;
+    }
+    return counts[ord];
+  }
+
+  @Override
+  public SimpleFacetResult getTopChildren(int topN, String dim, String... path) throws IOException {
+    FacetLabel cp = FacetLabel.create(dim, path);
+    int ord = taxoReader.getOrdinal(cp);
+    if (ord == -1) {
+      //System.out.println("no ord for path=" + path);
+      return null;
+    }
+    return getTopChildren(cp, ord, topN);
+  }
+
+  private SimpleFacetResult getTopChildren(FacetLabel path, int dimOrd, int topN) throws IOException {
+
+    TopOrdCountQueue q = new TopOrdCountQueue(topN);
+    
+    int bottomCount = 0;
+
+    int ord = children[dimOrd];
+    int totCount = 0;
+
+    TopOrdCountQueue.OrdAndCount reuse = null;
+    while(ord != TaxonomyReader.INVALID_ORDINAL) {
+      if (counts[ord] > 0) {
+        totCount += counts[ord];
+        if (counts[ord] > bottomCount) {
+          if (reuse == null) {
+            reuse = new TopOrdCountQueue.OrdAndCount();
+          }
+          reuse.ord = ord;
+          reuse.count = counts[ord];
+          reuse = q.insertWithOverflow(reuse);
+          if (q.size() == topN) {
+            bottomCount = q.top().count;
+          }
+        }
+      }
+
+      ord = siblings[ord];
+    }
+
+    if (totCount == 0) {
+      //System.out.println("totCount=0 for path=" + path);
+      return null;
+    }
+
+    FacetsConfig.DimConfig ft = facetsConfig.getDimConfig(path.components[0]);
+    // nocommit shouldn't we verify the indexedFieldName
+    // matches what was passed to our ctor?
+    if (ft.hierarchical && ft.multiValued) {
+      totCount = counts[dimOrd];
+    }
+
+    LabelAndValue[] labelValues = new LabelAndValue[q.size()];
+    for(int i=labelValues.length-1;i>=0;i--) {
+      TopOrdCountQueue.OrdAndCount ordAndCount = q.pop();
+      FacetLabel child = taxoReader.getPath(ordAndCount.ord);
+      labelValues[i] = new LabelAndValue(child.components[path.length], ordAndCount.count);
+    }
+
+    return new SimpleFacetResult(path, totCount, labelValues);
+  }
+
+  @Override
+  public List<SimpleFacetResult> getAllDims(int topN) throws IOException {
+    int ord = children[TaxonomyReader.ROOT_ORDINAL];
+    List<SimpleFacetResult> results = new ArrayList<SimpleFacetResult>();
+    while (ord != TaxonomyReader.INVALID_ORDINAL) {
+      SimpleFacetResult result = getTopChildren(taxoReader.getPath(ord), ord, topN);
+      if (result != null) {
+        results.add(result);
+      }
+      ord = siblings[ord];
+    }
+
+    // Sort by highest count:
+    Collections.sort(results,
+                     new Comparator<SimpleFacetResult>() {
+                       @Override
+                       public int compare(SimpleFacetResult a, SimpleFacetResult b) {
+                         if (a.value.intValue() > b.value.intValue()) {
+                           return -1;
+                         } else if (b.value.intValue() > a.value.intValue()) {
+                           return 1;
+                         } else {
+                           // Tie break by dimension
+                           return a.path.components[0].compareTo(b.path.components[0]);
+                         }
+                       }
+                     });
+
+    return results;
+  }
+}
--- a/lucene/facet/src/java/org/apache/lucene/facet/simple/OrdinalsReader.java
+++ b/lucene/facet/src/java/org/apache/lucene/facet/simple/OrdinalsReader.java
@ -0,0 +1,37 @@
+package org.apache.lucene.facet.simple;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.index.AtomicReaderContext;
+import org.apache.lucene.util.IntsRef;
+
+/** Provides per-document ordinals. */
+
+public abstract class OrdinalsReader {
+
+  public static abstract class OrdinalsSegmentReader {
+    /** Get the ordinals for this document.  ordinals.offset
+     *  must always be 0! */
+    public abstract void get(int doc, IntsRef ordinals) throws IOException;
+  }
+
+  /** Set current atomic reader. */
+  public abstract OrdinalsSegmentReader getReader(AtomicReaderContext context) throws IOException;
+}
--- a/lucene/facet/src/java/org/apache/lucene/facet/simple/SimpleDrillSideways.java
+++ b/lucene/facet/src/java/org/apache/lucene/facet/simple/SimpleDrillSideways.java
@ -110,7 +110,7 @@ public class SimpleDrillSideways {
   *  impl. */
  protected Facets buildFacetsResult(SimpleFacetsCollector drillDowns, SimpleFacetsCollector[] drillSideways, String[] drillSidewaysDims) throws IOException {

-    Facets drillDownFacets = new TaxonomyFacetCounts(taxoReader, facetsConfig, drillDowns);
+    Facets drillDownFacets = new FastTaxonomyFacetCounts(taxoReader, facetsConfig, drillDowns);

    if (drillSideways == null) {
      return drillDownFacets;
@ -118,7 +118,7 @@ public class SimpleDrillSideways {
      Map<String,Facets> drillSidewaysFacets = new HashMap<String,Facets>();
      for(int i=0;i<drillSideways.length;i++) {
        drillSidewaysFacets.put(drillSidewaysDims[i],
-                                new TaxonomyFacetCounts(taxoReader, facetsConfig, drillSideways[i]));
+                                new FastTaxonomyFacetCounts(taxoReader, facetsConfig, drillSideways[i]));
      }
      return new MultiFacets(drillSidewaysFacets, drillDownFacets);
    }
--- a/lucene/facet/src/java/org/apache/lucene/facet/simple/TaxonomyFacetCounts.java
+++ b/lucene/facet/src/java/org/apache/lucene/facet/simple/TaxonomyFacetCounts.java
@ -31,24 +31,24 @@ import org.apache.lucene.facet.taxonomy.TaxonomyReader;
 import org.apache.lucene.index.BinaryDocValues;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.FixedBitSet;
+import org.apache.lucene.util.IntsRef;
+
+/** Reads from any {@link OrdinalsReader}; use {@link
+ *  FastTaxonomyFacetCounts} if you are just using the
+ *  default encoding from {@link BinaryDocValues}. */

-// nocommit jdoc that this assumes/requires the default encoding
 public class TaxonomyFacetCounts extends Facets {
+  private final OrdinalsReader ordinalsReader;
  private final FacetsConfig facetsConfig;
  private final TaxonomyReader taxoReader;
  private final int[] counts;
-  private final String facetsFieldName;
  private final int[] children;
  private final int[] parents;
  private final int[] siblings;

-  public TaxonomyFacetCounts(TaxonomyReader taxoReader, FacetsConfig facetsConfig, SimpleFacetsCollector fc) throws IOException {
-    this(FacetsConfig.DEFAULT_INDEXED_FIELD_NAME, taxoReader, facetsConfig, fc);
-  }
-
-  public TaxonomyFacetCounts(String facetsFieldName, TaxonomyReader taxoReader, FacetsConfig facetsConfig, SimpleFacetsCollector fc) throws IOException {
+  public TaxonomyFacetCounts(OrdinalsReader ordinalsReader, TaxonomyReader taxoReader, FacetsConfig facetsConfig, SimpleFacetsCollector fc) throws IOException {
    this.taxoReader = taxoReader;
-    this.facetsFieldName = facetsFieldName;
+    this.ordinalsReader = ordinalsReader;
    this.facetsConfig = facetsConfig;
    ParallelTaxonomyArrays pta = taxoReader.getParallelTaxonomyArrays();
    children = pta.children();
@ -60,35 +60,17 @@ public class TaxonomyFacetCounts extends Facets {

  private final void count(List<MatchingDocs> matchingDocs) throws IOException {
    //System.out.println("count matchingDocs=" + matchingDocs + " facetsField=" + facetsFieldName);
+    IntsRef scratch  = new IntsRef();
    for(MatchingDocs hits : matchingDocs) {
-      BinaryDocValues dv = hits.context.reader().getBinaryDocValues(facetsFieldName);
-      if (dv == null) { // this reader does not have DocValues for the requested category list
-        continue;
-      }
+      OrdinalsReader.OrdinalsSegmentReader ords = ordinalsReader.getReader(hits.context);
      FixedBitSet bits = hits.bits;
    
      final int length = hits.bits.length();
      int doc = 0;
-      BytesRef scratch = new BytesRef();
-      //System.out.println("count seg=" + hits.context.reader());
      while (doc < length && (doc = bits.nextSetBit(doc)) != -1) {
-        //System.out.println("  doc=" + doc);
-        dv.get(doc, scratch);
-        byte[] bytes = scratch.bytes;
-        int end = scratch.offset + scratch.length;
-        int ord = 0;
-        int offset = scratch.offset;
-        int prev = 0;
-        while (offset < end) {
-          byte b = bytes[offset++];
-          if (b >= 0) {
-            prev = ord = ((ord << 7) | b) + prev;
-            assert ord < counts.length: "ord=" + ord + " vs maxOrd=" + counts.length;
-            ++counts[ord];
-            ord = 0;
-          } else {
-            ord = (ord << 7) | (b & 0x7F);
-          }
+        ords.get(doc, scratch);
+        for(int i=0;i<scratch.length;i++) {
+          ++counts[scratch.ints[i]];
        }
        ++doc;
      }
--- a/lucene/facet/src/java/org/apache/lucene/facet/simple/TaxonomyFacetSumValueSource.java
+++ b/lucene/facet/src/java/org/apache/lucene/facet/simple/TaxonomyFacetSumValueSource.java
@ -35,6 +35,7 @@ import org.apache.lucene.queries.function.ValueSource;
 import org.apache.lucene.search.Scorer;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.FixedBitSet;
+import org.apache.lucene.util.IntsRef;

 /** Aggregates sum of values from a {@link ValueSource}, for
 *  each facet label. */
@ -45,18 +46,20 @@ public class TaxonomyFacetSumValueSource extends Facets {
  private final FacetsConfig facetsConfig;
  private final TaxonomyReader taxoReader;
  private final float[] values;
-  private final String facetsFieldName;
  private final int[] children;
  private final int[] parents;
  private final int[] siblings;
+  private final OrdinalsReader ordinalsReader;

-  public TaxonomyFacetSumValueSource(TaxonomyReader taxoReader, FacetsConfig facetsConfig, SimpleFacetsCollector fc, ValueSource valueSource) throws IOException {
-    this(FacetsConfig.DEFAULT_INDEXED_FIELD_NAME, taxoReader, facetsConfig, fc, valueSource);
+  public TaxonomyFacetSumValueSource(TaxonomyReader taxoReader, FacetsConfig facetsConfig,
+                                     SimpleFacetsCollector fc, ValueSource valueSource) throws IOException {
+    this(new DocValuesOrdinalsReader(FacetsConfig.DEFAULT_INDEXED_FIELD_NAME), taxoReader, facetsConfig, fc, valueSource);
  }

-  public TaxonomyFacetSumValueSource(String facetsFieldName, TaxonomyReader taxoReader, FacetsConfig facetsConfig, SimpleFacetsCollector fc, ValueSource valueSource) throws IOException {
+  public TaxonomyFacetSumValueSource(OrdinalsReader ordinalsReader, TaxonomyReader taxoReader,
+                                     FacetsConfig facetsConfig, SimpleFacetsCollector fc, ValueSource valueSource) throws IOException {
    this.taxoReader = taxoReader;
-    this.facetsFieldName = facetsFieldName;
+    this.ordinalsReader = ordinalsReader;
    this.facetsConfig = facetsConfig;
    ParallelTaxonomyArrays pta = taxoReader.getParallelTaxonomyArrays();
    children = pta.children();
@ -82,43 +85,26 @@ public class TaxonomyFacetSumValueSource extends Facets {
    final FakeScorer scorer = new FakeScorer();
    Map<String, Scorer> context = new HashMap<String, Scorer>();
    context.put("scorer", scorer);
+    IntsRef scratch = new IntsRef();
    for(MatchingDocs hits : matchingDocs) {
-      BinaryDocValues dv = hits.context.reader().getBinaryDocValues(facetsFieldName);
-      if (dv == null) { // this reader does not have DocValues for the requested category list
-        continue;
-      }
+      OrdinalsReader.OrdinalsSegmentReader ords = ordinalsReader.getReader(hits.context);
      FixedBitSet bits = hits.bits;
    
      final int length = hits.bits.length();
      int doc = 0;
      int scoresIdx = 0;
-      BytesRef scratch = new BytesRef();
      float[] scores = hits.scores;

      FunctionValues functionValues = valueSource.getValues(context, hits.context);
      while (doc < length && (doc = bits.nextSetBit(doc)) != -1) {
-        dv.get(doc, scratch);
+        ords.get(doc, scratch);
        if (keepScores) {
          scorer.docID = doc;
          scorer.score = scores[scoresIdx++];
        }
-        byte[] bytes = scratch.bytes;
-        int end = scratch.offset + scratch.length;
-        int ord = 0;
-        int offset = scratch.offset;
-        int prev = 0;
-
        float value = (float) functionValues.doubleVal(doc);
-
-        while (offset < end) {
-          byte b = bytes[offset++];
-          if (b >= 0) {
-            prev = ord = ((ord << 7) | b) + prev;
-            values[ord] += value;
-            ord = 0;
-          } else {
-            ord = (ord << 7) | (b & 0x7F);
-          }
+        for(int i=0;i<scratch.length;i++) {
+          values[scratch.ints[i]] += value;
        }
        ++doc;
      }
--- a/lucene/facet/src/test/org/apache/lucene/facet/simple/TestTaxonomyFacets.java
+++ b/lucene/facet/src/test/org/apache/lucene/facet/simple/TestTaxonomyFacets.java
@ -107,7 +107,7 @@ public class TestTaxonomyFacets extends FacetTestCase {
    // wrap collecting the "normal" hits and also facets:
    searcher.search(new MatchAllDocsQuery(), c);

-    TaxonomyFacetCounts facets = new TaxonomyFacetCounts(taxoReader, fts, c);
+    Facets facets = new FastTaxonomyFacetCounts(taxoReader, fts, c);

    // Retrieve & verify results:
    assertEquals("Publish Date (5)\n  2010 (2)\n  2012 (2)\n  1999 (1)\n", facets.getTopChildren(10, "Publish Date").toString());
@ -118,7 +118,7 @@ public class TestTaxonomyFacets extends FacetTestCase {
    q2.add(new FacetLabel("Publish Date", "2010"));
    c = new SimpleFacetsCollector();
    searcher.search(q2, c);
-    facets = new TaxonomyFacetCounts(taxoReader, fts, c);
+    facets = new FastTaxonomyFacetCounts(taxoReader, fts, c);
    assertEquals("Author (2)\n  Bob (1)\n  Lisa (1)\n", facets.getTopChildren(10, "Author").toString());

    assertEquals(1, facets.getSpecificValue("Author", "Lisa"));
@ -185,7 +185,16 @@ public class TestTaxonomyFacets extends FacetTestCase {
    SimpleFacetsCollector c = new SimpleFacetsCollector();
    searcher.search(new MatchAllDocsQuery(), c);    

-    TaxonomyFacetCounts facets = new TaxonomyFacetCounts(taxoReader, new FacetsConfig(), c);
+    Facets facets;
+    if (random().nextBoolean()) {
+      facets = new FastTaxonomyFacetCounts(taxoReader, new FacetsConfig(), c);
+    } else {
+      OrdinalsReader ordsReader = new DocValuesOrdinalsReader();
+      if (random().nextBoolean()) {
+        ordsReader = new CachedOrdinalsReader(ordsReader);
+      }
+      facets = new TaxonomyFacetCounts(ordsReader, taxoReader, new FacetsConfig(), c);
+    }

    // Ask for top 10 labels for any dims that have counts:
    List<SimpleFacetResult> results = facets.getAllDims(10);