Merge trunk.

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene4547@1439991 13f79535-47bb-0310-9956-ffa450edef68
2025-02-17 23:45:09 +00:00 · 2013-01-29 16:44:16 +00:00 · 2013-01-29 16:44:16 +00:00 · 577364e414
commit 577364e414
parent 661d292ff3 6e94f43152
73 changed files with 1544 additions and 592 deletions
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@ -79,6 +79,9 @@ New Features
  near-real-time reader is opened that contains those changes.
  (Robert Muir, Mike McCandless)

+* LUCENE-4723: Add AnalyzerFactoryTask to benchmark, and enable analyzer
+  creation via the resulting factories using NewAnalyzerTask.  (Steve Rowe)
+
 API Changes

 * LUCENE-4709: FacetResultNode no longer has a residue field. (Shai Erera)
@ -104,6 +107,14 @@ Bug Fixes
  degrees and barely any height, it would generate so many indexed terms
  (> 500k) that it could even cause an OutOfMemoryError. Fixed. (David Smiley)

+* LUCENE-4704: Make join queries override hashcode and equals methods.
+  (Martijn van Groningen)
+
+* LUCENE-4724: Fix bug in CategoryPath which allowed passing null or empty
+  string components. This is forbidden now (throws an exception). Note that if
+  you have a taxonomy index created with such strings, you should rebuild it.
+  (Michael McCandless, Shai Erera)
+
 ======================= Lucene 4.1.0 =======================

 Changes in backwards compatibility policy
--- a/lucene/benchmark/conf/shingle.alg
+++ b/lucene/benchmark/conf/shingle.alg
@ -19,25 +19,43 @@ doc.body.tokenized=true
 docs.dir=reuters-out
 log.step=1000

+-AnalyzerFactory(name:shingle-bigrams-unigrams,
+                 StandardTokenizer,
+                 ShingleFilter(maxShingleSize:2, outputUnigrams:true))
+
+-AnalyzerFactory(name:shingle-bigrams,
+                 StandardTokenizer,
+                 ShingleFilter(maxShingleSize:2, outputUnigrams:false))
+
+-AnalyzerFactory(name:shingle-4grams-unigrams,
+                 StandardTokenizer,
+                 ShingleFilter(maxShingleSize:4, outputUnigrams:true))
+
+-AnalyzerFactory(name:shingle-4grams,
+                 StandardTokenizer,
+                 ShingleFilter(maxShingleSize:4, outputUnigrams:false))
+
+-AnalyzerFactory(name:standard-tokenizer-only, StandardTokenizer)
+
 { "Rounds"

-    -NewShingleAnalyzer(maxShingleSize:2,outputUnigrams:true)
+    -NewAnalyzer(shingle-bigrams-unigrams)
    -ResetInputs
    { "BigramsAndUnigrams" { ReadTokens > : 10000 }

-    -NewShingleAnalyzer(maxShingleSize:2,outputUnigrams:false)
+    -NewAnalyzer(shingle-bigrams)
    -ResetInputs
    { "BigramsOnly" { ReadTokens > : 10000 }

-    -NewShingleAnalyzer(maxShingleSize:4,outputUnigrams:true)
+    -NewAnalyzer(shingle-4grams-unigrams)
    -ResetInputs
    { "FourgramsAndUnigrams" { ReadTokens > : 10000 }

-    -NewShingleAnalyzer(maxShingleSize:4,outputUnigrams:false)
+    -NewAnalyzer(shingle-4grams)
    -ResetInputs
    { "FourgramsOnly" { ReadTokens > : 10000 }

-    -NewAnalyzer(standard.StandardAnalyzer)
+    -NewAnalyzer(standard-tokenizer-only)
    -ResetInputs
    { "UnigramsOnly" { ReadTokens > : 10000 }

--- a/lucene/benchmark/scripts/shingle.bm2jira.pl
+++ b/lucene/benchmark/scripts/shingle.bm2jira.pl
@ -51,7 +51,7 @@ while (<>) {

 # Print out platform info
 print "JAVA:\n", `java -version 2>&1`, "\nOS:\n";
-if ($^O =~ /win/i) {
+if ($^O =~ /(?<!dar)win/i) {
  print "$^O\n";
  eval {
    require Win32;
--- a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/PerfRunData.java
+++ b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/PerfRunData.java
@ -23,6 +23,7 @@ import java.io.IOException;
 import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.Locale;
+import java.util.Map;

 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.benchmark.byTask.feeds.ContentSource;
@ -34,6 +35,7 @@ import org.apache.lucene.benchmark.byTask.tasks.NewAnalyzerTask;
 import org.apache.lucene.benchmark.byTask.tasks.PerfTask;
 import org.apache.lucene.benchmark.byTask.tasks.ReadTask;
 import org.apache.lucene.benchmark.byTask.tasks.SearchTask;
+import org.apache.lucene.benchmark.byTask.utils.AnalyzerFactory;
 import org.apache.lucene.benchmark.byTask.utils.Config;
 import org.apache.lucene.benchmark.byTask.utils.FileUtils;
 import org.apache.lucene.facet.taxonomy.TaxonomyReader;
@ -55,6 +57,7 @@ import org.apache.lucene.util.IOUtils;
 *  <li>Directory, Writer, Reader.
 *  <li>Taxonomy Directory, Writer, Reader.
 *  <li>DocMaker, FacetSource and a few instances of QueryMaker.
+ *  <li>Named AnalysisFactories.
 *  <li>Analyzer.
 *  <li>Statistics data which updated during the run.
 * </ul>
@ -78,6 +81,7 @@ public class PerfRunData implements Closeable {
  // directory, analyzer, docMaker - created at startup.
  // reader, writer, searcher - maintained by basic tasks. 
  private Directory directory;
+  private Map<String,AnalyzerFactory> analyzerFactories = new HashMap<String,AnalyzerFactory>();
  private Analyzer analyzer;
  private DocMaker docMaker;
  private ContentSource contentSource;
@ -358,7 +362,7 @@ public class PerfRunData implements Closeable {
  }

  /**
-   * @return Returns the anlyzer.
+   * @return Returns the analyzer.
   */
  public Analyzer getAnalyzer() {
    return analyzer;
@ -434,4 +438,7 @@ public class PerfRunData implements Closeable {
    return qm;
  }

+  public Map<String,AnalyzerFactory> getAnalyzerFactories() {
+    return analyzerFactories;
+  }
 }
--- a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/AnalyzerFactoryTask.java
+++ b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/AnalyzerFactoryTask.java
@ -0,0 +1,459 @@
+package org.apache.lucene.benchmark.byTask.tasks;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.analysis.util.AbstractAnalysisFactory;
+import org.apache.lucene.analysis.util.CharFilterFactory;
+import org.apache.lucene.analysis.util.FilesystemResourceLoader;
+import org.apache.lucene.analysis.util.ResourceLoaderAware;
+import org.apache.lucene.analysis.util.TokenFilterFactory;
+import org.apache.lucene.analysis.util.TokenizerFactory;
+import org.apache.lucene.benchmark.byTask.PerfRunData;
+import org.apache.lucene.benchmark.byTask.utils.AnalyzerFactory;
+import org.apache.lucene.util.Version;
+
+import java.io.File;
+import java.io.StreamTokenizer;
+import java.io.StringReader;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.regex.Pattern;
+
+/**
+ * Analyzer factory construction task.  The name given to the constructed factory may
+ * be given to NewAnalyzerTask, which will call AnalyzerFactory.create().
+ *
+ * Params are in the form argname:argvalue or argname:"argvalue" or argname:'argvalue';
+ * use backslashes to escape '"' or "'" inside a quoted value when it's used as the enclosing
+ * quotation mark,
+ *
+ * Specify params in a comma separated list of the following, in order:
+ * <ol>
+ *   <li>Analyzer args:
+ *     <ul>
+ *       <li><b>Required</b>: <code>name:<i>analyzer-factory-name</i></code></li>
+ *       <li>Optional: <tt>positionIncrementGap:<i>int value</i></tt> (default: 0)</li>
+ *       <li>Optional: <tt>offsetGap:<i>int value</i></tt> (default: 1)</li>
+ *     </ul>
+ *   </li>
+ *   <li>zero or more CharFilterFactory's, followed by</li>
+ *   <li>exactly one TokenizerFactory, followed by</li>
+ *   <li>zero or more TokenFilterFactory's</li>
+ * </ol>
+ *
+ * Each component analysis factory map specify <tt>luceneMatchVersion</tt> (defaults to
+ * {@link Version#LUCENE_CURRENT}) and any of the args understood by the specified
+ * *Factory class, in the above-describe param format.
+ * <p/>
+ * Example:
+ * <pre>
+ *     -AnalyzerFactory(name:'strip html, fold to ascii, whitespace tokenize, max 10k tokens',
+ *                      positionIncrementGap:100,
+ *                      HTMLStripCharFilter,
+ *                      MappingCharFilter(mapping:'mapping-FoldToASCII.txt'),
+ *                      WhitespaceTokenizer(luceneMatchVersion:LUCENE_42),
+ *                      TokenLimitFilter(maxTokenCount:10000, consumeAllTokens:false))
+ *     [...]
+ *     -NewAnalyzer('strip html, fold to ascii, whitespace tokenize, max 10k tokens')
+ * </pre>
+ * <p/>
+ * AnalyzerFactory will direct analysis component factories to look for resources
+ * under the directory specified in the "work.dir" property.
+ */
+public class AnalyzerFactoryTask extends PerfTask {
+  private static final String LUCENE_ANALYSIS_PACKAGE_PREFIX = "org.apache.lucene.analysis.";
+  private static final Pattern ANALYSIS_COMPONENT_SUFFIX_PATTERN
+      = Pattern.compile("(?s:(?:(?:Token|Char)?Filter|Tokenizer)(?:Factory)?)$");
+  private static final Pattern TRAILING_DOT_ZERO_PATTERN = Pattern.compile("\\.0$");
+
+  private enum ArgType {ANALYZER_ARG, ANALYZER_ARG_OR_CHARFILTER_OR_TOKENIZER, TOKENFILTER }
+
+  String factoryName = null;
+  Integer positionIncrementGap = null;
+  Integer offsetGap = null;
+  private List<CharFilterFactory> charFilterFactories = new ArrayList<CharFilterFactory>();
+  private TokenizerFactory tokenizerFactory = null;
+  private List<TokenFilterFactory> tokenFilterFactories = new ArrayList<TokenFilterFactory>();
+
+  public AnalyzerFactoryTask(PerfRunData runData) {
+    super(runData);
+  }
+
+  @Override
+  public int doLogic() {
+    return 1;
+  }
+
+  /**
+   * Sets the params.
+   * Analysis component factory names may optionally include the "Factory" suffix.
+   *
+   * @param params analysis pipeline specification: name, (optional) positionIncrementGap,
+   *               (optional) offsetGap, 0+ CharFilterFactory's, 1 TokenizerFactory,
+   *               and 0+ TokenFilterFactory's
+   */
+  @Override
+  public void setParams(String params) {
+    super.setParams(params);
+    ArgType expectedArgType = ArgType.ANALYZER_ARG;
+
+    final StreamTokenizer stok = new StreamTokenizer(new StringReader(params));
+    stok.commentChar('#');
+    stok.quoteChar('"');
+    stok.quoteChar('\'');
+    stok.eolIsSignificant(false);
+    stok.ordinaryChar('(');
+    stok.ordinaryChar(')');
+    stok.ordinaryChar(':');
+    stok.ordinaryChar(',');
+    try {
+      while (stok.nextToken() != StreamTokenizer.TT_EOF) {
+        switch (stok.ttype) {
+          case ',': {
+            // Do nothing
+            break;
+          }
+          case StreamTokenizer.TT_WORD: {
+            if (expectedArgType.equals(ArgType.ANALYZER_ARG)) {
+              final String argName = stok.sval;
+              if ( ! argName.equalsIgnoreCase("name")
+                  && ! argName.equalsIgnoreCase("positionIncrementGap")
+                  && ! argName.equalsIgnoreCase("offsetGap")) {
+                throw new RuntimeException
+                    ("Line #" + lineno(stok) + ": Missing 'name' param to AnalyzerFactory: '" + params + "'");
+              }
+              stok.nextToken();
+              if (stok.ttype != ':') {
+                throw new RuntimeException
+                    ("Line #" + lineno(stok) + ": Missing ':' after '" + argName + "' param to AnalyzerFactory");
+              }
+
+              stok.nextToken();
+              String argValue = stok.sval;
+              switch (stok.ttype) {
+                case StreamTokenizer.TT_NUMBER: {
+                  argValue = Double.toString(stok.nval);
+                  // Drop the ".0" from numbers, for integer arguments
+                  argValue = TRAILING_DOT_ZERO_PATTERN.matcher(argValue).replaceFirst("");
+                  // Intentional fallthrough
+                }
+                case '"':
+                case '\'':
+                case StreamTokenizer.TT_WORD: {
+                  if (argName.equalsIgnoreCase("name")) {
+                    factoryName = argValue;
+                    expectedArgType = ArgType.ANALYZER_ARG_OR_CHARFILTER_OR_TOKENIZER;
+                  } else {
+                    int intArgValue = 0;
+                    try {
+                      intArgValue = Integer.parseInt(argValue);
+                    } catch (NumberFormatException e) {
+                      throw new RuntimeException
+                          ("Line #" + lineno(stok) + ": Exception parsing " + argName + " value '" + argValue + "'", e);
+                    }
+                    if (argName.equalsIgnoreCase("positionIncrementGap")) {
+                      positionIncrementGap = intArgValue;
+                    } else if (argName.equalsIgnoreCase("offsetGap")) {
+                      offsetGap = intArgValue;
+                    }
+                  }
+                  break;
+                }
+                case StreamTokenizer.TT_EOF: {
+                  throw new RuntimeException("Unexpected EOF: " + stok.toString());
+                }
+                default: {
+                  throw new RuntimeException
+                      ("Line #" + lineno(stok) + ": Unexpected token: " + stok.toString());
+                }
+              }
+            } else if (expectedArgType.equals(ArgType.ANALYZER_ARG_OR_CHARFILTER_OR_TOKENIZER)) {
+              final String argName = stok.sval;
+
+              if (argName.equalsIgnoreCase("positionIncrementGap")
+                  || argName.equalsIgnoreCase("offsetGap")) {
+                stok.nextToken();
+                if (stok.ttype != ':') {
+                  throw new RuntimeException
+                      ("Line #" + lineno(stok) + ": Missing ':' after '" + argName + "' param to AnalyzerFactory");
+                }
+                stok.nextToken();
+                int intArgValue = (int)stok.nval;
+                switch (stok.ttype) {
+                  case '"':
+                  case '\'':
+                  case StreamTokenizer.TT_WORD: {
+                    intArgValue = 0;
+                    try {
+                      intArgValue = Integer.parseInt(stok.sval.trim());
+                    } catch (NumberFormatException e) {
+                      throw new RuntimeException
+                          ("Line #" + lineno(stok) + ": Exception parsing " + argName + " value '" + stok.sval + "'", e);
+                    }
+                    // Intentional fall-through
+                  }
+                  case StreamTokenizer.TT_NUMBER: {
+                    if (argName.equalsIgnoreCase("positionIncrementGap")) {
+                      positionIncrementGap = intArgValue;
+                    } else if (argName.equalsIgnoreCase("offsetGap")) {
+                      offsetGap = intArgValue;
+                    }
+                    break;
+                  }
+                  case StreamTokenizer.TT_EOF: {
+                    throw new RuntimeException("Unexpected EOF: " + stok.toString());
+                  }
+                  default: {
+                    throw new RuntimeException
+                        ("Line #" + lineno(stok) + ": Unexpected token: " + stok.toString());
+                  }
+                }
+                break;
+              }
+              try {
+                final Class<? extends CharFilterFactory> clazz;
+                clazz = lookupAnalysisClass(argName, CharFilterFactory.class);
+                createAnalysisPipelineComponent(stok, clazz);
+              } catch (IllegalArgumentException e) {
+                try {
+                  final Class<? extends TokenizerFactory> clazz;
+                  clazz = lookupAnalysisClass(argName, TokenizerFactory.class);
+                  createAnalysisPipelineComponent(stok, clazz);
+                  expectedArgType = ArgType.TOKENFILTER;
+                } catch (IllegalArgumentException e2) {
+                  throw new RuntimeException("Line #" + lineno(stok) + ": Can't find class '"
+                                             + argName + "' as CharFilterFactory or TokenizerFactory");
+                }
+              }
+            } else { // expectedArgType = ArgType.TOKENFILTER
+              final String className = stok.sval;
+              final Class<? extends TokenFilterFactory> clazz;
+              try {
+                clazz = lookupAnalysisClass(className, TokenFilterFactory.class);
+              } catch (IllegalArgumentException e) {
+                  throw new RuntimeException
+                      ("Line #" + lineno(stok) + ": Can't find class '" + className + "' as TokenFilterFactory");
+              }
+              createAnalysisPipelineComponent(stok, clazz);
+            }
+            break;
+          }
+          default: {
+            throw new RuntimeException("Line #" + lineno(stok) + ": Unexpected token: " + stok.toString());
+          }
+        }
+      }
+    } catch (RuntimeException e) {
+      if (e.getMessage().startsWith("Line #")) {
+        throw e;
+      } else {
+        throw new RuntimeException("Line #" + lineno(stok) + ": ", e);
+      }
+    } catch (Throwable t) {
+      throw new RuntimeException("Line #" + lineno(stok) + ": ", t);
+    }
+
+    final AnalyzerFactory analyzerFactory = new AnalyzerFactory
+        (charFilterFactories, tokenizerFactory, tokenFilterFactories);
+    analyzerFactory.setPositionIncrementGap(positionIncrementGap);
+    analyzerFactory.setOffsetGap(offsetGap);
+    getRunData().getAnalyzerFactories().put(factoryName, analyzerFactory);
+  }
+
+  /**
+   * Instantiates the given analysis factory class after pulling params from
+   * the given stream tokenizer, then stores the result in the appropriate
+   * pipeline component list.
+   *
+   * @param stok stream tokenizer from which to draw analysis factory params
+   * @param clazz analysis factory class to instantiate
+   */
+  private void createAnalysisPipelineComponent
+      (StreamTokenizer stok, Class<? extends AbstractAnalysisFactory> clazz) {
+    final AbstractAnalysisFactory instance;
+    try {
+     instance = clazz.newInstance();
+    } catch (Exception e) {
+      throw new RuntimeException("Line #" + lineno(stok) + ": ", e);
+    }
+    Version luceneMatchVersion = null;
+    Map<String,String> argMap = new HashMap<String,String>();
+    boolean parenthetical = false;
+    try {
+      WHILE_LOOP: while (stok.nextToken() != StreamTokenizer.TT_EOF) {
+        switch (stok.ttype) {
+          case ',': {
+            if (parenthetical) {
+              // Do nothing
+              break;
+            } else {
+              // Finished reading this analysis factory configuration
+              break WHILE_LOOP;
+            }
+          }
+          case '(': {
+            if (parenthetical) {
+              throw new RuntimeException
+                  ("Line #" + lineno(stok) + ": Unexpected opening parenthesis.");
+            }
+            parenthetical = true;
+            break;
+          }
+          case ')': {
+            if (parenthetical) {
+              parenthetical = false;
+            } else {
+              throw new RuntimeException
+                  ("Line #" + lineno(stok) + ": Unexpected closing parenthesis.");
+            }
+            break;
+          }
+          case StreamTokenizer.TT_WORD: {
+            if ( ! parenthetical) {
+              throw new RuntimeException("Line #" + lineno(stok) + ": Unexpected token '" + stok.sval + "'");
+            }
+            String argName = stok.sval;
+            stok.nextToken();
+            if (stok.ttype != ':') {
+              throw new RuntimeException
+                  ("Line #" + lineno(stok) + ": Missing ':' after '" + argName + "' param to " + clazz.getSimpleName());
+            }
+            stok.nextToken();
+            String argValue = stok.sval;
+            switch (stok.ttype) {
+              case StreamTokenizer.TT_NUMBER: {
+                  argValue = Double.toString(stok.nval);
+                  // Drop the ".0" from numbers, for integer arguments
+                  argValue = TRAILING_DOT_ZERO_PATTERN.matcher(argValue).replaceFirst("");
+                  // Intentional fall-through
+              }
+              case '"':
+              case '\'':
+              case StreamTokenizer.TT_WORD: {
+                if (argName.equalsIgnoreCase("luceneMatchVersion")) {
+                  try {
+                    luceneMatchVersion = Version.parseLeniently(argValue);
+                  } catch (IllegalArgumentException e) {
+                    throw new RuntimeException
+                        ("Line #" + lineno(stok) + ": Unrecognized luceneMatchVersion '" + argValue + "'", e);
+                  }
+                } else {
+                  argMap.put(argName, argValue);
+                }
+                break;
+              }
+              case StreamTokenizer.TT_EOF: {
+                throw new RuntimeException("Unexpected EOF: " + stok.toString());
+              }
+              default: {
+                throw new RuntimeException
+                    ("Line #" + lineno(stok) + ": Unexpected token: " + stok.toString());
+              }
+            }
+          }
+        }
+      }
+
+      instance.setLuceneMatchVersion
+          (null == luceneMatchVersion ? Version.LUCENE_CURRENT : luceneMatchVersion);
+      instance.init(argMap);
+      if (instance instanceof ResourceLoaderAware) {
+        File baseDir = new File(getRunData().getConfig().get("work.dir", "work")).getAbsoluteFile();
+        ((ResourceLoaderAware)instance).inform(new FilesystemResourceLoader(baseDir));
+      }
+      if (CharFilterFactory.class.isAssignableFrom(clazz)) {
+        charFilterFactories.add((CharFilterFactory)instance);
+      } else if (TokenizerFactory.class.isAssignableFrom(clazz)) {
+        tokenizerFactory = (TokenizerFactory)instance;
+      } else if (TokenFilterFactory.class.isAssignableFrom(clazz)) {
+        tokenFilterFactories.add((TokenFilterFactory)instance);
+      }
+    } catch (RuntimeException e) {
+      if (e.getMessage().startsWith("Line #")) {
+        throw (e);
+      } else {
+        throw new RuntimeException("Line #" + lineno(stok) + ": ", e);
+      }
+    } catch (Throwable t) {
+      throw new RuntimeException("Line #" + lineno(stok) + ": ", t);
+    }
+  }
+
+  /**
+   * This method looks up a class with its fully qualified name (FQN), or a short-name
+   * class-simplename, or with a package suffix, assuming "org.apache.lucene.analysis."
+   * as the package prefix (e.g. "standard.ClassicTokenizerFactory" ->
+   * "org.apache.lucene.analysis.standard.ClassicTokenizerFactory").
+   *
+   * If className contains a period, the class is first looked up as-is, assuming that it
+   * is an FQN.  If this fails, lookup is retried after prepending the Lucene analysis
+   * package prefix to the class name.
+   *
+   * If className does not contain a period, the analysis SPI *Factory.lookupClass()
+   * methods are used to find the class.
+   *
+   * @param className The name or the short name of the class.
+   * @param expectedType The superclass className is expected to extend
+   * @return the loaded class.
+   * @throws ClassNotFoundException if lookup fails
+   */
+  public <T> Class<? extends T> lookupAnalysisClass(String className, Class<T> expectedType)
+      throws ClassNotFoundException {
+    if (className.contains(".")) {
+      try {
+        // First, try className == FQN
+        return Class.forName(className).asSubclass(expectedType);
+      } catch (ClassNotFoundException e) {
+        try {
+          // Second, retry lookup after prepending the Lucene analysis package prefix
+          return Class.forName(LUCENE_ANALYSIS_PACKAGE_PREFIX + className).asSubclass(expectedType);
+        } catch (ClassNotFoundException e1) {
+          throw new ClassNotFoundException("Can't find class '" + className
+                                           + "' or '" + LUCENE_ANALYSIS_PACKAGE_PREFIX + className + "'");
+        }
+      }
+    }
+    // No dot - use analysis SPI lookup
+    final String analysisComponentName = ANALYSIS_COMPONENT_SUFFIX_PATTERN.matcher(className).replaceFirst("");
+    if (CharFilterFactory.class.isAssignableFrom(expectedType)) {
+      return CharFilterFactory.lookupClass(analysisComponentName).asSubclass(expectedType);
+    } else if (TokenizerFactory.class.isAssignableFrom(expectedType)) {
+      return TokenizerFactory.lookupClass(analysisComponentName).asSubclass(expectedType);
+    } else if (TokenFilterFactory.class.isAssignableFrom(expectedType)) {
+      return TokenFilterFactory.lookupClass(analysisComponentName).asSubclass(expectedType);
+    }
+
+    throw new ClassNotFoundException("Can't find class '" + className + "'");
+  }
+
+
+  /* (non-Javadoc)
+   * @see org.apache.lucene.benchmark.byTask.tasks.PerfTask#supportsParams()
+   */
+  @Override
+  public boolean supportsParams() {
+    return true;
+  }
+
+  /** Returns the current line in the algorithm file */
+  public int lineno(StreamTokenizer stok) {
+    return getAlgLineNum() + stok.lineno();
+  }
+}
--- a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/NewAnalyzerTask.java
+++ b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/NewAnalyzerTask.java
@ -16,10 +16,16 @@ package org.apache.lucene.benchmark.byTask.tasks;
 */

 import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.util.CharFilterFactory;
+import org.apache.lucene.analysis.util.TokenFilterFactory;
+import org.apache.lucene.analysis.util.TokenizerFactory;
 import org.apache.lucene.benchmark.byTask.PerfRunData;
+import org.apache.lucene.benchmark.byTask.utils.AnalyzerFactory;
 import org.apache.lucene.util.Version;

 import java.io.IOException;
+import java.io.StreamTokenizer;
+import java.io.StringReader;
 import java.util.*;
 import java.lang.reflect.Constructor;

@ -28,12 +34,12 @@ import java.lang.reflect.Constructor;
 *
 */
 public class NewAnalyzerTask extends PerfTask {
-  private List<String> analyzerClassNames;
+  private List<String> analyzerNames;
  private int current;

  public NewAnalyzerTask(PerfRunData runData) {
    super(runData);
-    analyzerClassNames = new ArrayList<String>();
+    analyzerNames = new ArrayList<String>();
  }
  
  public static final Analyzer createAnalyzer(String className) throws Exception{
@ -50,55 +56,98 @@ public class NewAnalyzerTask extends PerfTask {

  @Override
  public int doLogic() throws IOException {
-    String className = null;
+    String analyzerName = null;
    try {
-      if (current >= analyzerClassNames.size()) {
+      if (current >= analyzerNames.size()) {
        current = 0;
      }
-      className = analyzerClassNames.get(current++);
+      analyzerName = analyzerNames.get(current++);
      Analyzer analyzer = null;
-      if (null == className || 0 == className.length()) {
-        className = "org.apache.lucene.analysis.standard.StandardAnalyzer";
+      if (null == analyzerName || 0 == analyzerName.length()) {
+        analyzerName = "org.apache.lucene.analysis.standard.StandardAnalyzer";
      }
-      if (-1 == className.indexOf(".")) {
-        try {
-          // If no package, first attempt to instantiate a core analyzer
-          String coreClassName = "org.apache.lucene.analysis.core." + className;
-          analyzer = createAnalyzer(coreClassName);
-          className = coreClassName;
-        } catch (ClassNotFoundException e) {
-          // If not a core analyzer, try the base analysis package 
-          className = "org.apache.lucene.analysis." + className;
-          analyzer = createAnalyzer(className);
-        }
+      // First, lookup analyzerName as a named analyzer factory
+      AnalyzerFactory factory = getRunData().getAnalyzerFactories().get(analyzerName);
+      if (null != factory) {
+        analyzer = factory.create();
      } else {
-        if (className.startsWith("standard.")) {
-          className = "org.apache.lucene.analysis." + className;
+        if (analyzerName.contains(".")) {
+          if (analyzerName.startsWith("standard.")) {
+            analyzerName = "org.apache.lucene.analysis." + analyzerName;
+          }
+          analyzer = createAnalyzer(analyzerName);
+        } else { // No package
+          try {
+            // Attempt to instantiate a core analyzer
+            String coreClassName = "org.apache.lucene.analysis.core." + analyzerName;
+            analyzer = createAnalyzer(coreClassName);
+            analyzerName = coreClassName;
+          } catch (ClassNotFoundException e) {
+            // If not a core analyzer, try the base analysis package
+            analyzerName = "org.apache.lucene.analysis." + analyzerName;
+            analyzer = createAnalyzer(analyzerName);
+          }
        }
-        analyzer = createAnalyzer(className);
      }
      getRunData().setAnalyzer(analyzer);
-      System.out.println("Changed Analyzer to: " + className);
    } catch (Exception e) {
-      throw new RuntimeException("Error creating Analyzer: " + className, e);
+      throw new RuntimeException("Error creating Analyzer: " + analyzerName, e);
    }
    return 1;
  }

  /**
-   * Set the params (analyzerClassName only),  Comma-separate list of Analyzer class names.  If the Analyzer lives in
+   * Set the params (analyzerName only),  Comma-separate list of Analyzer class names.  If the Analyzer lives in
   * org.apache.lucene.analysis, the name can be shortened by dropping the o.a.l.a part of the Fully Qualified Class Name.
   * <p/>
+   * Analyzer names may also refer to previously defined AnalyzerFactory's.
+   * <p/>
   * Example Declaration: {"NewAnalyzer" NewAnalyzer(WhitespaceAnalyzer, SimpleAnalyzer, StopAnalyzer, standard.StandardAnalyzer) >
+   * <p/>
+   * Example AnalyzerFactory usage:
+   * <pre>
+   * -AnalyzerFactory(name:'whitespace tokenized',WhitespaceTokenizer)
+   * -NewAnalyzer('whitespace tokenized')
+   * </pre>
   * @param params analyzerClassName, or empty for the StandardAnalyzer
   */
  @Override
  public void setParams(String params) {
    super.setParams(params);
-    for (StringTokenizer tokenizer = new StringTokenizer(params, ","); tokenizer.hasMoreTokens();) {
-      String s = tokenizer.nextToken();
-      analyzerClassNames.add(s.trim());
+    final StreamTokenizer stok = new StreamTokenizer(new StringReader(params));
+    stok.quoteChar('"');
+    stok.quoteChar('\'');
+    stok.eolIsSignificant(false);
+    stok.ordinaryChar(',');
+    try {
+      while (stok.nextToken() != StreamTokenizer.TT_EOF) {
+        switch (stok.ttype) {
+          case ',': {
+            // Do nothing
+            break;
+          }
+          case '\'':
+          case '\"':
+          case StreamTokenizer.TT_WORD: {
+            analyzerNames.add(stok.sval);
+            break;
+          }
+          default: {
+            throw new RuntimeException("Unexpected token: " + stok.toString());
+          }
+        }
+      }
+    } catch (RuntimeException e) {
+      if (e.getMessage().startsWith("Line #")) {
+        throw e;
+      } else {
+        throw new RuntimeException("Line #" + (stok.lineno() + getAlgLineNum()) + ": ", e);
+      }
+    } catch (Throwable t) {
+      throw new RuntimeException("Line #" + (stok.lineno() + getAlgLineNum()) + ": ", t);
    }
+
+
  }

  /* (non-Javadoc)
--- a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/NewShingleAnalyzerTask.java
+++ b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/NewShingleAnalyzerTask.java
@ -1,117 +0,0 @@
-package org.apache.lucene.benchmark.byTask.tasks;
-
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import java.util.StringTokenizer;
-
-import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.shingle.ShingleAnalyzerWrapper;
-import org.apache.lucene.analysis.shingle.ShingleFilter;
-import org.apache.lucene.benchmark.byTask.PerfRunData;
-
-/**
- * Task to support benchmarking ShingleFilter / ShingleAnalyzerWrapper
- * <p>
- * <ul>
- *  <li> <code>NewShingleAnalyzer</code> (constructs with all defaults)
- *  <li> <code>NewShingleAnalyzer(analyzer:o.a.l.analysis.StandardAnalyzer,maxShingleSize:2,outputUnigrams:true)</code>
- * </ul>
- * </p>
- */
-public class NewShingleAnalyzerTask extends PerfTask {
-
-  private String analyzerClassName = "standard.StandardAnalyzer";
-  private int maxShingleSize = 2;
-  private boolean outputUnigrams = true;
-  
-  public NewShingleAnalyzerTask(PerfRunData runData) {
-    super(runData);
-  }
-
-  private void setAnalyzer() throws Exception {
-    Analyzer wrappedAnalyzer = null;
-    if (null == analyzerClassName || 0 == analyzerClassName.length()) {
-      analyzerClassName = "org.apache.lucene.analysis.standard.StandardAnalyzer";
-    } 
-    if (-1 == analyzerClassName.indexOf(".")) {
-      String coreClassName = "org.apache.lucene.analysis.core." + analyzerClassName;
-      try {
-        // If there is no package, first attempt to instantiate a core analyzer
-        wrappedAnalyzer = NewAnalyzerTask.createAnalyzer(coreClassName);
-        analyzerClassName = coreClassName;
-      } catch (ClassNotFoundException e) {
-        // If this is not a core analyzer, try the base analysis package 
-        analyzerClassName = "org.apache.lucene.analysis." + analyzerClassName;
-        wrappedAnalyzer = NewAnalyzerTask.createAnalyzer(analyzerClassName);
-      }
-    } else {    
-      if (analyzerClassName.startsWith("standard.")) {
-        analyzerClassName = "org.apache.lucene.analysis." + analyzerClassName;
-      }
-      wrappedAnalyzer = NewAnalyzerTask.createAnalyzer(analyzerClassName);
-    }
-    
-    ShingleAnalyzerWrapper analyzer = new ShingleAnalyzerWrapper(
-        wrappedAnalyzer,
-        ShingleFilter.DEFAULT_MIN_SHINGLE_SIZE,
-        maxShingleSize,
-        ShingleFilter.TOKEN_SEPARATOR,
-        outputUnigrams,
-        false);
-    getRunData().setAnalyzer(analyzer);
-  }
-  
-  @Override
-  public int doLogic() throws Exception {
-    try {
-      setAnalyzer();
-      System.out.println
-        ("Changed Analyzer to: ShingleAnalyzerWrapper, wrapping ShingleFilter over " 
-         + analyzerClassName);
-    } catch (Exception e) {
-      throw new RuntimeException("Error creating Analyzer", e);
-    }
-    return 1;
-  }
-  
-  @Override
-  public void setParams(String params) {
-    super.setParams(params);
-    StringTokenizer st = new StringTokenizer(params, ",");
-    while (st.hasMoreTokens()) {
-      String param = st.nextToken();
-      StringTokenizer expr = new StringTokenizer(param, ":");
-      String key = expr.nextToken();
-      String value = expr.nextToken();
-      if (key.equalsIgnoreCase("analyzer")) {
-        analyzerClassName = value;
-      } else if (key.equalsIgnoreCase("outputUnigrams")) {
-        outputUnigrams = Boolean.parseBoolean(value);
-      } else if (key.equalsIgnoreCase("maxShingleSize")) {
-        maxShingleSize = (int)Double.parseDouble(value);
-      } else {
-        throw new RuntimeException("Unknown parameter " + param);
-      }
-    }
-  }
-
-  @Override
-  public boolean supportsParams() {
-    return true;
-  }
-}
--- a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/PerfTask.java
+++ b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/PerfTask.java
@ -62,6 +62,9 @@ public abstract class PerfTask implements Cloneable {
  private boolean runInBackground;
  private int deltaPri;

+  // The first line of this task's definition in the alg file
+  private int algLineNum = 0;
+
  protected static final String NEW_LINE = System.getProperty("line.separator");

  /** Should not be used externally */
@ -317,4 +320,11 @@ public abstract class PerfTask implements Cloneable {
    this.disableCounting = disableCounting;
  }

+  public void setAlgLineNum(int algLineNum) {
+    this.algLineNum = algLineNum;
+  }
+
+  public int getAlgLineNum() {
+    return algLineNum;
+  }
 }
--- a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/utils/Algorithm.java
+++ b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/utils/Algorithm.java
@ -58,11 +58,12 @@ public class Algorithm {
    StreamTokenizer stok = new StreamTokenizer(new StringReader(algTxt));
    stok.commentChar('#');
    stok.eolIsSignificant(false);
-    stok.ordinaryChar('"');
+    stok.quoteChar('"');
+    stok.quoteChar('\'');
    stok.ordinaryChar('/');
    stok.ordinaryChar('(');
    stok.ordinaryChar(')');
-    boolean colonOk = false; 
+    boolean colonOk = false;
    boolean isDisableCountNextTask = false; // only for primitive tasks
    currSequence.setDepth(0);
    
@ -74,6 +75,7 @@ public class Algorithm {
          Constructor<? extends PerfTask> cnstr = taskClass(config,s)
            .asSubclass(PerfTask.class).getConstructor(PerfRunData.class);
          PerfTask task = cnstr.newInstance(runData);
+          task.setAlgLineNum(stok.lineno());
          task.setDisableCounting(isDisableCountNextTask);
          isDisableCountNextTask = false;
          currSequence.addTask(task);
@ -90,24 +92,54 @@ public class Algorithm {
          if (stok.ttype!='(') {
            stok.pushBack();
          } else {
-            // get params, for tasks that supports them, - anything until next ')'
+            // get params, for tasks that supports them - allow recursive parenthetical expressions
+            stok.eolIsSignificant(true);  // Allow params tokenizer to keep track of line number
            StringBuilder params = new StringBuilder();
            stok.nextToken();
-            while (stok.ttype!=')') { 
-              switch (stok.ttype) {
-                case StreamTokenizer.TT_NUMBER:  
-                  params.append(stok.nval);
-                  break;
-                case StreamTokenizer.TT_WORD:    
-                  params.append(stok.sval);             
-                  break;
-                case StreamTokenizer.TT_EOF:     
-                  throw new Exception("unexpexted EOF: - "+stok.toString());
-                default:
-                  params.append((char)stok.ttype);
+            if (stok.ttype != ')') {
+              int count = 1;
+              BALANCED_PARENS: while (true) {
+                switch (stok.ttype) {
+                  case StreamTokenizer.TT_NUMBER: {
+                    params.append(stok.nval);
+                    break;
+                  }
+                  case StreamTokenizer.TT_WORD: {
+                    params.append(stok.sval);
+                    break;
+                  }
+                  case StreamTokenizer.TT_EOF: {
+                    throw new RuntimeException("Unexpexted EOF: - "+stok.toString());
+                  }
+                  case '"':
+                  case '\'': {
+                    params.append((char)stok.ttype);
+                    // re-escape delimiters, if any
+                    params.append(stok.sval.replaceAll("" + (char)stok.ttype, "\\\\" + (char)stok.ttype));
+                    params.append((char)stok.ttype);
+                    break;
+                  }
+                  case '(': {
+                    params.append((char)stok.ttype);
+                    ++count;
+                    break;
+                  }
+                  case ')': {
+                    if (--count >= 1) {  // exclude final closing parenthesis
+                      params.append((char)stok.ttype);
+                    } else {
+                      break BALANCED_PARENS;
+                    }
+                    break;
+                  }
+                  default: {
+                    params.append((char)stok.ttype);
+                  }
+                }
+                stok.nextToken();
              }
-              stok.nextToken();
            }
+            stok.eolIsSignificant(false);
            String prm = params.toString().trim();
            if (prm.length()>0) {
              task.setParams(prm);
@ -182,10 +214,8 @@ public class Algorithm {
              if (stok.ttype!='"') {
                stok.pushBack();
              } else {
-                stok.nextToken();
                name = stok.sval;
-                stok.nextToken();
-                if (stok.ttype!='"' || name==null || name.length()==0) { 
+                if (stok.ttype!='"' || name==null || name.length()==0) {
                  throw new Exception("sequence name problem - "+stok.toString()); 
                }
              }
--- a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/utils/AnalyzerFactory.java
+++ b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/utils/AnalyzerFactory.java
@ -0,0 +1,132 @@
+package org.apache.lucene.benchmark.byTask.utils;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.util.CharFilterFactory;
+import org.apache.lucene.analysis.util.TokenFilterFactory;
+import org.apache.lucene.analysis.util.TokenizerFactory;
+
+import java.io.Reader;
+import java.util.List;
+
+/**
+ * A factory to create an analyzer.
+ * See {@link org.apache.lucene.benchmark.byTask.tasks.AnalyzerFactoryTask}
+ */
+public final class AnalyzerFactory {
+  final private List<CharFilterFactory> charFilterFactories;
+  final private TokenizerFactory tokenizerFactory;
+  final private List<TokenFilterFactory> tokenFilterFactories;
+  private String name = null;
+  private Integer positionIncrementGap = null;
+  private Integer offsetGap = null;
+
+  public AnalyzerFactory(List<CharFilterFactory> charFilterFactories,
+                         TokenizerFactory tokenizerFactory,
+                         List<TokenFilterFactory> tokenFilterFactories) {
+    this.charFilterFactories = charFilterFactories;
+    assert null != tokenizerFactory;
+    this.tokenizerFactory = tokenizerFactory;
+    this.tokenFilterFactories = tokenFilterFactories;
+  }
+
+  public void setName(String name) {
+    this.name = name;
+  }
+
+  public void setPositionIncrementGap(Integer positionIncrementGap) {
+    this.positionIncrementGap = positionIncrementGap;
+  }
+
+  public void setOffsetGap(Integer offsetGap) {
+    this.offsetGap = offsetGap;
+  }
+
+  public Analyzer create() {
+    return new Analyzer() {
+      private final Integer positionIncrementGap = AnalyzerFactory.this.positionIncrementGap;
+      private final Integer offsetGap = AnalyzerFactory.this.offsetGap;
+
+      @Override
+      public Reader initReader(String fieldName, Reader reader) {
+        if (charFilterFactories != null && charFilterFactories.size() > 0) {
+          Reader wrappedReader = reader;
+          for (CharFilterFactory charFilterFactory : charFilterFactories) {
+            wrappedReader = charFilterFactory.create(wrappedReader);
+          }
+          reader = wrappedReader;
+        }
+        return reader;
+      }
+
+      @Override
+      protected Analyzer.TokenStreamComponents createComponents(String fieldName, Reader reader) {
+        final Tokenizer tokenizer = tokenizerFactory.create(reader);
+        TokenStream tokenStream = tokenizer;
+        for (TokenFilterFactory filterFactory : tokenFilterFactories) {
+          tokenStream = filterFactory.create(tokenStream);
+        }
+        return new TokenStreamComponents(tokenizer, tokenStream);
+      }
+
+      @Override
+      public int getPositionIncrementGap(String fieldName) {
+        return null == positionIncrementGap ? super.getPositionIncrementGap(fieldName) : positionIncrementGap;
+      }
+
+      @Override
+      public int getOffsetGap(String fieldName) {
+        return null == offsetGap ? super.getOffsetGap(fieldName) : offsetGap;
+      }
+    };
+  }
+
+  @Override
+  public String toString() {
+    StringBuilder sb = new StringBuilder("AnalyzerFactory(");
+    if (null != name) {
+      sb.append("name:");
+      sb.append(name);
+      sb.append(", ");
+    }
+    if (null != positionIncrementGap) {
+      sb.append("positionIncrementGap:");
+      sb.append(positionIncrementGap);
+      sb.append(", ");
+    }
+    if (null != offsetGap) {
+      sb.append("offsetGap:");
+      sb.append(offsetGap);
+      sb.append(", ");
+    }
+    for (CharFilterFactory charFilterFactory: charFilterFactories) {
+      sb.append(charFilterFactory);
+      sb.append(", ");
+    }
+    sb.append(tokenizerFactory);
+    for (TokenFilterFactory tokenFilterFactory : tokenFilterFactories) {
+      sb.append(", ");
+      sb.append(tokenFilterFactory);
+    }
+    sb.append(')');
+    return sb.toString();
+  }
+}
--- a/lucene/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java
+++ b/lucene/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java
@ -71,6 +71,7 @@ public class TestPerfTasksLogic extends BenchmarkTestCase {
  public void setUp() throws Exception {
    super.setUp();
    copyToWorkDir("reuters.first20.lines.txt");
+    copyToWorkDir("test-mapping-ISOLatin1Accent-partial.txt");
  }

  /**
@ -1019,63 +1020,79 @@ public class TestPerfTasksLogic extends BenchmarkTestCase {
  }
  
  /**
-   * Test that we can create ShingleAnalyzerWrappers.
+   * Test that we can create shingle analyzers using AnalyzerFactory.
   */
  public void testShingleAnalyzer() throws Exception {
    String text = "one,two,three, four five six";
    
-    // Default analyzer, maxShingleSize, and outputUnigrams
-    Benchmark benchmark = execBenchmark(getShingleConfig(""));
+    // StandardTokenizer, maxShingleSize, and outputUnigrams
+    Benchmark benchmark = execBenchmark(getAnalyzerFactoryConfig
+        ("shingle-analyzer", "StandardTokenizer,ShingleFilter"));
    benchmark.getRunData().getAnalyzer().tokenStream
        ("bogus", new StringReader(text)).close();
-    assertEqualShingle(benchmark.getRunData().getAnalyzer(), text,
-                       new String[] {"one", "one two", "two", "two three",
-                                     "three", "three four", "four", "four five",
-                                     "five", "five six", "six"});
-    // Default analyzer, maxShingleSize = 3, and outputUnigrams = false
+    BaseTokenStreamTestCase.assertAnalyzesTo(benchmark.getRunData().getAnalyzer(), text,
+                                             new String[] { "one", "one two", "two", "two three",
+                                                            "three", "three four", "four", "four five",
+                                                            "five", "five six", "six" });
+    // StandardTokenizer, maxShingleSize = 3, and outputUnigrams = false
    benchmark = execBenchmark
-      (getShingleConfig("maxShingleSize:3,outputUnigrams:false"));
-    assertEqualShingle(benchmark.getRunData().getAnalyzer(), text,
-                       new String[] { "one two", "one two three", "two three",
-                                      "two three four", "three four", 
-                                      "three four five", "four five",
-                                      "four five six", "five six" });
-    // WhitespaceAnalyzer, default maxShingleSize and outputUnigrams
+      (getAnalyzerFactoryConfig
+          ("shingle-analyzer",
+           "StandardTokenizer,ShingleFilter(maxShingleSize:3,outputUnigrams:false)"));
+    BaseTokenStreamTestCase.assertAnalyzesTo(benchmark.getRunData().getAnalyzer(), text,
+                                             new String[] { "one two", "one two three", "two three",
+                                                            "two three four", "three four",
+                                                            "three four five", "four five",
+                                                            "four five six", "five six" });
+    // WhitespaceTokenizer, default maxShingleSize and outputUnigrams
    benchmark = execBenchmark
-      (getShingleConfig("analyzer:WhitespaceAnalyzer"));
-    assertEqualShingle(benchmark.getRunData().getAnalyzer(), text,
-                       new String[] { "one,two,three,", "one,two,three, four",
-                                      "four", "four five", "five", "five six", 
-                                      "six" });
+      (getAnalyzerFactoryConfig("shingle-analyzer", "WhitespaceTokenizer,ShingleFilter"));
+    BaseTokenStreamTestCase.assertAnalyzesTo(benchmark.getRunData().getAnalyzer(), text,
+                                             new String[] { "one,two,three,", "one,two,three, four",
+                                                            "four", "four five", "five", "five six",
+                                                            "six" });
    
-    // WhitespaceAnalyzer, maxShingleSize=3 and outputUnigrams=false
+    // WhitespaceTokenizer, maxShingleSize=3 and outputUnigrams=false
    benchmark = execBenchmark
-      (getShingleConfig
-        ("outputUnigrams:false,maxShingleSize:3,analyzer:WhitespaceAnalyzer"));
-    assertEqualShingle(benchmark.getRunData().getAnalyzer(), text,
-                       new String[] { "one,two,three, four", 
-                                      "one,two,three, four five",
-                                      "four five", "four five six",
-                                      "five six" });
+      (getAnalyzerFactoryConfig
+        ("shingle-factory",
+         "WhitespaceTokenizer,ShingleFilter(outputUnigrams:false,maxShingleSize:3)"));
+    BaseTokenStreamTestCase.assertAnalyzesTo(benchmark.getRunData().getAnalyzer(), text,
+                                             new String[] { "one,two,three, four",
+                                                            "one,two,three, four five",
+                                                            "four five", "four five six",
+                                                            "five six" });
  }
  
-  private void assertEqualShingle
-    (Analyzer analyzer, String text, String[] expected) throws Exception {
-    BaseTokenStreamTestCase.assertAnalyzesTo(analyzer, text, expected);
-  }
-  
-  private String[] getShingleConfig(String params) { 
+  private String[] getAnalyzerFactoryConfig(String name, String params) {
+    final String singleQuoteEscapedName = name.replaceAll("'", "\\\\'");
    String algLines[] = {
        "content.source=org.apache.lucene.benchmark.byTask.feeds.LineDocSource",
        "docs.file=" + getReuters20LinesFile(),
+        "work.dir=" + getWorkDir().getAbsolutePath().replaceAll("\\\\", "/"), // Fix Windows path
        "content.source.forever=false",
        "directory=RAMDirectory",
-        "NewShingleAnalyzer(" + params + ")",
+        "AnalyzerFactory(name:'" + singleQuoteEscapedName + "', " + params + ")",
+        "NewAnalyzer('" + singleQuoteEscapedName + "')",
        "CreateIndex",
        "{ \"AddDocs\"  AddDoc > : * "
    };
    return algLines;
  }
+
+  public void testAnalyzerFactory() throws Exception {
+    String text = "Fortieth, Quarantième, Cuadragésimo";
+    Benchmark benchmark = execBenchmark(getAnalyzerFactoryConfig
+        ("ascii folded, pattern replaced, standard tokenized, downcased, bigrammed.'analyzer'",
+         "positionIncrementGap:100,offsetGap:1111,"
+         +"MappingCharFilter(mapping:'test-mapping-ISOLatin1Accent-partial.txt'),"
+         +"PatternReplaceCharFilterFactory(pattern:'e(\\\\\\\\S*)m',replacement:\"$1xxx$1\"),"
+         +"StandardTokenizer,LowerCaseFilter,NGramTokenFilter(minGramSize:2,maxGramSize:2)"));
+    BaseTokenStreamTestCase.assertAnalyzesTo(benchmark.getRunData().getAnalyzer(), text,
+        new String[] { "fo", "or", "rt", "ti", "ie", "et", "th",
+                       "qu", "ua", "ar", "ra", "an", "nt", "ti", "ix", "xx", "xx", "xe",
+                       "cu", "ua", "ad", "dr", "ra", "ag", "gs", "si", "ix", "xx", "xx", "xs", "si", "io"});
+  }
  
  private String getReuters20LinesFile() {
    return getWorkDirResourcePath("reuters.first20.lines.txt");
--- a/lucene/benchmark/src/test/org/apache/lucene/benchmark/byTask/test-mapping-ISOLatin1Accent-partial.txt
+++ b/lucene/benchmark/src/test/org/apache/lucene/benchmark/byTask/test-mapping-ISOLatin1Accent-partial.txt
@ -0,0 +1,30 @@
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Syntax:
+#   "source" => "target"
+#     "source".length() > 0 (source cannot be empty.)
+#     "target".length() >= 0 (target can be empty.)
+
+# example:
+#   "À" => "A"
+#   "\u00C0" => "A"
+#   "\u00C0" => "\u0041"
+#   "ß" => "ss"
+#   "\t" => " "
+#   "\n" => ""
+
+# è => e
+"\u00E8" => "e"
+
+# é => e
+"\u00E9" => "e"
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene41/ForUtil.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene41/ForUtil.java
@ -63,7 +63,7 @@ final class ForUtil {
          }
          final PackedInts.Decoder decoder = PackedInts.getDecoder(format, version, bpv);
          final int iterations = computeIterations(decoder);
-          maxDataSize = Math.max(maxDataSize, iterations * decoder.valueCount());
+          maxDataSize = Math.max(maxDataSize, iterations * decoder.byteValueCount());
        }
      }
    }
@ -75,7 +75,7 @@ final class ForUtil {
   * values with the provided {@link Decoder}.
   */
  private static int computeIterations(PackedInts.Decoder decoder) {
-    return (int) Math.ceil((float) BLOCK_SIZE / decoder.valueCount());
+    return (int) Math.ceil((float) BLOCK_SIZE / decoder.byteValueCount());
  }

  /**
@ -165,9 +165,9 @@ final class ForUtil {
    assert numBits > 0 && numBits <= 32 : numBits;
    final PackedInts.Encoder encoder = encoders[numBits];
    final int iters = iterations[numBits];
-    assert iters * encoder.valueCount() >= BLOCK_SIZE;
+    assert iters * encoder.byteValueCount() >= BLOCK_SIZE;
    final int encodedSize = encodedSizes[numBits];
-    assert (iters * encoder.blockCount()) << 3 >= encodedSize;
+    assert iters * encoder.byteBlockCount() >= encodedSize;

    out.writeByte((byte) numBits);

@ -198,7 +198,7 @@ final class ForUtil {

    final PackedInts.Decoder decoder = decoders[numBits];
    final int iters = iterations[numBits];
-    assert iters * decoder.valueCount() >= BLOCK_SIZE;
+    assert iters * decoder.byteValueCount() >= BLOCK_SIZE;

    decoder.decode(encoded, 0, decoded, 0, iters);
  }
--- a/lucene/core/src/java/org/apache/lucene/util/packed/AbstractBlockPackedWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/util/packed/AbstractBlockPackedWriter.java
@ -130,8 +130,8 @@ abstract class AbstractBlockPackedWriter {

  protected final void writeValues(int bitsRequired) throws IOException {
    final PackedInts.Encoder encoder = PackedInts.getEncoder(PackedInts.Format.PACKED, PackedInts.VERSION_CURRENT, bitsRequired);
-    final int iterations = values.length / encoder.valueCount();
-    final int blockSize = encoder.blockCount() * 8 * iterations;
+    final int iterations = values.length / encoder.byteValueCount();
+    final int blockSize = encoder.byteBlockCount() * iterations;
    if (blocks == null || blocks.length < blockSize) {
      blocks = new byte[blockSize];
    }
--- a/lucene/core/src/java/org/apache/lucene/util/packed/BlockPackedReaderIterator.java
+++ b/lucene/core/src/java/org/apache/lucene/util/packed/BlockPackedReaderIterator.java
@ -212,8 +212,8 @@ public final class BlockPackedReaderIterator {
      Arrays.fill(values, minValue);
    } else {
      final PackedInts.Decoder decoder = PackedInts.getDecoder(PackedInts.Format.PACKED, packedIntsVersion, bitsPerValue);
-      final int iterations = blockSize / decoder.valueCount();
-      final int blocksSize = iterations * 8 * decoder.blockCount();
+      final int iterations = blockSize / decoder.byteValueCount();
+      final int blocksSize = iterations * decoder.byteBlockCount();
      if (blocks == null || blocks.length < blocksSize) {
        blocks = new byte[blocksSize];
      }
--- a/lucene/core/src/java/org/apache/lucene/util/packed/BulkOperation.java
+++ b/lucene/core/src/java/org/apache/lucene/util/packed/BulkOperation.java
@ -2,7 +2,6 @@

 package org.apache.lucene.util.packed;

-
 /*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
@ -153,35 +152,30 @@ abstract class BulkOperation implements PackedInts.Decoder, PackedInts.Encoder {
   * For every number of bits per value, there is a minimum number of
   * blocks (b) / values (v) you need to write in order to reach the next block
   * boundary:
-   *  - 16 bits per value -> b=1, v=4
-   *  - 24 bits per value -> b=3, v=8
-   *  - 50 bits per value -> b=25, v=32
-   *  - 63 bits per value -> b=63, v=64
+   *  - 16 bits per value -> b=2, v=1
+   *  - 24 bits per value -> b=3, v=1
+   *  - 50 bits per value -> b=25, v=4
+   *  - 63 bits per value -> b=63, v=8
   *  - ...
-   * <p>
+   *
   * A bulk read consists in copying <code>iterations*v</code> values that are
   * contained in <code>iterations*b</code> blocks into a <code>long[]</code>
   * (higher values of <code>iterations</code> are likely to yield a better
-   * throughput) => this requires n * (b + v) longs in memory.
-   * <p>
+   * throughput) => this requires n * (b + 8v) bytes of memory.
+   *
   * This method computes <code>iterations</code> as
-   * <code>ramBudget / (8 * (b + v))</code> (since a long is 8 bytes).
-   * <p>
-   * The resulting number of iterations of this method is guaranteed not to
-   * overflow when multiplied by
-   * <tt>8 * {@link PackedInts.Encoder#blockCount()}</tt> or
-   * <tt>8 * {@link PackedInts.Decoder#blockCount()}</tt>.
+   * <code>ramBudget / (b + 8v)</code> (since a long is 8 bytes).
   */
  public final int computeIterations(int valueCount, int ramBudget) {
-    final int iterations = (ramBudget >>> 3) / (blockCount() + valueCount());
+    final int iterations = ramBudget / (byteBlockCount() + 8 * byteValueCount());
    if (iterations == 0) {
      // at least 1
      return 1;
-    } else if ((iterations - 1) * blockCount() >= valueCount) {
+    } else if ((iterations - 1) * byteValueCount() >= valueCount) {
      // don't allocate for more than the size of the reader
-      return (int) Math.ceil((double) valueCount / valueCount());
+      return (int) Math.ceil((double) valueCount / byteValueCount());
    } else {
      return iterations;
    }
  }
-}
+}
--- a/lucene/core/src/java/org/apache/lucene/util/packed/BulkOperationPacked.java
+++ b/lucene/core/src/java/org/apache/lucene/util/packed/BulkOperationPacked.java
@ -1,5 +1,6 @@
 package org.apache.lucene.util.packed;

+
 /*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
@ -23,9 +24,12 @@ package org.apache.lucene.util.packed;
 class BulkOperationPacked extends BulkOperation {

  private final int bitsPerValue;
-  private final int blockCount;
-  private final int valueCount;
+  private final int longBlockCount;
+  private final int longValueCount;
+  private final int byteBlockCount;
+  private final int byteValueCount;
  private final long mask;
+  private final int intMask;

  public BulkOperationPacked(int bitsPerValue) {
    this.bitsPerValue = bitsPerValue;
@ -34,31 +38,50 @@ class BulkOperationPacked extends BulkOperation {
    while ((blocks & 1) == 0) {
      blocks >>>= 1;
    }
-    this.blockCount = blocks;
-    this.valueCount = 64 * blockCount / bitsPerValue;
+    this.longBlockCount = blocks;
+    this.longValueCount = 64 * longBlockCount / bitsPerValue;
+    int byteBlockCount = 8 * longBlockCount;
+    int byteValueCount = longValueCount;
+    while ((byteBlockCount & 1) == 0 && (byteValueCount & 1) == 0) {
+      byteBlockCount >>>= 1;
+      byteValueCount >>>= 1;
+    }
+    this.byteBlockCount = byteBlockCount;
+    this.byteValueCount = byteValueCount;
    if (bitsPerValue == 64) {
      this.mask = ~0L;
    } else {
      this.mask = (1L << bitsPerValue) - 1;
    }
-    assert valueCount * bitsPerValue == 64 * blockCount;
+    this.intMask = (int) mask;
+    assert longValueCount * bitsPerValue == 64 * longBlockCount;
  }

  @Override
-  public int blockCount() {
-    return blockCount;
+  public int longBlockCount() {
+    return longBlockCount;
  }

  @Override
-  public int valueCount() {
-    return valueCount;
+  public int longValueCount() {
+    return longValueCount;
+  }
+
+  @Override
+  public int byteBlockCount() {
+    return byteBlockCount;
+  }
+
+  @Override
+  public int byteValueCount() {
+    return byteValueCount;
  }

  @Override
  public void decode(long[] blocks, int blocksOffset, long[] values,
      int valuesOffset, int iterations) {
    int bitsLeft = 64;
-    for (int i = 0; i < valueCount * iterations; ++i) {
+    for (int i = 0; i < longValueCount * iterations; ++i) {
      bitsLeft -= bitsPerValue;
      if (bitsLeft < 0) {
        values[valuesOffset++] =
@ -74,22 +97,28 @@ class BulkOperationPacked extends BulkOperation {
  @Override
  public void decode(byte[] blocks, int blocksOffset, long[] values,
      int valuesOffset, int iterations) {
-    int blockBitsLeft = 8;
-    int valueBitsLeft = bitsPerValue;
-    long nextValue = 0;
-    for (int end = valuesOffset + iterations * valueCount; valuesOffset < end; ) {
-      if (valueBitsLeft > blockBitsLeft) {
-        nextValue |= (blocks[blocksOffset++] & ((1L << blockBitsLeft) - 1)) << (valueBitsLeft - blockBitsLeft);
-        valueBitsLeft -= blockBitsLeft;
-        blockBitsLeft = 8;
+    long nextValue = 0L;
+    int bitsLeft = bitsPerValue;
+    for (int i = 0; i < iterations * byteBlockCount; ++i) {
+      final long bytes = blocks[blocksOffset++] & 0xFFL;
+      if (bitsLeft > 8) {
+        // just buffer
+        bitsLeft -= 8;
+        nextValue |= bytes << bitsLeft;
      } else {
-        nextValue |= ((blocks[blocksOffset] & 0xFFL) >>> (blockBitsLeft - valueBitsLeft)) & ((1L << valueBitsLeft) - 1);
-        values[valuesOffset++] = nextValue;
-        nextValue = 0;
-        blockBitsLeft -= valueBitsLeft;
-        valueBitsLeft = bitsPerValue;
+        // flush
+        int bits = 8 - bitsLeft;
+        values[valuesOffset++] = nextValue | (bytes >>> bits);
+        while (bits >= bitsPerValue) {
+          bits -= bitsPerValue;
+          values[valuesOffset++] = (bytes >>> bits) & mask;
+        }
+        // then buffer
+        bitsLeft = bitsPerValue - bits;
+        nextValue = (bytes & ((1L << bits) - 1)) << bitsLeft;
      }
    }
+    assert bitsLeft == bitsPerValue;
  }

  @Override
@ -99,7 +128,7 @@ class BulkOperationPacked extends BulkOperation {
      throw new UnsupportedOperationException("Cannot decode " + bitsPerValue + "-bits values into an int[]");
    }
    int bitsLeft = 64;
-    for (int i = 0; i < valueCount * iterations; ++i) {
+    for (int i = 0; i < longValueCount * iterations; ++i) {
      bitsLeft -= bitsPerValue;
      if (bitsLeft < 0) {
        values[valuesOffset++] = (int)
@ -115,25 +144,28 @@ class BulkOperationPacked extends BulkOperation {
  @Override
  public void decode(byte[] blocks, int blocksOffset, int[] values,
      int valuesOffset, int iterations) {
-    if (bitsPerValue > 32) {
-      throw new UnsupportedOperationException("Cannot decode " + bitsPerValue + "-bits values into an int[]");
-    }
-    int blockBitsLeft = 8;
-    int valueBitsLeft = bitsPerValue;
    int nextValue = 0;
-    for (int end = valuesOffset + iterations * valueCount; valuesOffset < end; ) {
-      if (valueBitsLeft > blockBitsLeft) {
-        nextValue |= (blocks[blocksOffset++] & ((1L << blockBitsLeft) - 1)) << (valueBitsLeft - blockBitsLeft);
-        valueBitsLeft -= blockBitsLeft;
-        blockBitsLeft = 8;
+    int bitsLeft = bitsPerValue;
+    for (int i = 0; i < iterations * byteBlockCount; ++i) {
+      final int bytes = blocks[blocksOffset++] & 0xFF;
+      if (bitsLeft > 8) {
+        // just buffer
+        bitsLeft -= 8;
+        nextValue |= bytes << bitsLeft;
      } else {
-        nextValue |= ((blocks[blocksOffset] & 0xFFL) >>> (blockBitsLeft - valueBitsLeft)) & ((1L << valueBitsLeft) - 1);
-        values[valuesOffset++] = nextValue;
-        nextValue = 0;
-        blockBitsLeft -= valueBitsLeft;
-        valueBitsLeft = bitsPerValue;
+        // flush
+        int bits = 8 - bitsLeft;
+        values[valuesOffset++] = nextValue | (bytes >>> bits);
+        while (bits >= bitsPerValue) {
+          bits -= bitsPerValue;
+          values[valuesOffset++] = (bytes >>> bits) & intMask;
+        }
+        // then buffer
+        bitsLeft = bitsPerValue - bits;
+        nextValue = (bytes & ((1 << bits) - 1)) << bitsLeft;
      }
    }
+    assert bitsLeft == bitsPerValue;
  }

  @Override
@ -141,7 +173,7 @@ class BulkOperationPacked extends BulkOperation {
      int blocksOffset, int iterations) {
    long nextBlock = 0;
    int bitsLeft = 64;
-    for (int i = 0; i < valueCount * iterations; ++i) {
+    for (int i = 0; i < longValueCount * iterations; ++i) {
      bitsLeft -= bitsPerValue;
      if (bitsLeft > 0) {
        nextBlock |= values[valuesOffset++] << bitsLeft;
@ -164,7 +196,7 @@ class BulkOperationPacked extends BulkOperation {
      int blocksOffset, int iterations) {
    long nextBlock = 0;
    int bitsLeft = 64;
-    for (int i = 0; i < valueCount * iterations; ++i) {
+    for (int i = 0; i < longValueCount * iterations; ++i) {
      bitsLeft -= bitsPerValue;
      if (bitsLeft > 0) {
        nextBlock |= (values[valuesOffset++] & 0xFFFFFFFFL) << bitsLeft;
@ -185,47 +217,57 @@ class BulkOperationPacked extends BulkOperation {
  @Override
  public void encode(long[] values, int valuesOffset, byte[] blocks,
      int blocksOffset, int iterations) {
-    long nextBlock = 0;
-    int bitsLeft = 64;
-    for (int i = 0; i < valueCount * iterations; ++i) {
-      bitsLeft -= bitsPerValue;
-      if (bitsLeft > 0) {
-        nextBlock |= values[valuesOffset++] << bitsLeft;
-      } else if (bitsLeft == 0) {
-        nextBlock |= values[valuesOffset++];
-        blocksOffset = writeLong(nextBlock, blocks, blocksOffset);
-        nextBlock = 0;
-        bitsLeft = 64;
-      } else { // bitsLeft < 0
-        nextBlock |= values[valuesOffset] >>> -bitsLeft;
-        blocksOffset = writeLong(nextBlock, blocks, blocksOffset);
-        nextBlock = (values[valuesOffset++] & ((1L << -bitsLeft) - 1)) << (64 + bitsLeft);
-        bitsLeft += 64;
+    int nextBlock = 0;
+    int bitsLeft = 8;
+    for (int i = 0; i < byteValueCount * iterations; ++i) {
+      final long v = values[valuesOffset++];
+      assert bitsPerValue == 64 || PackedInts.bitsRequired(v) <= bitsPerValue;
+      if (bitsPerValue < bitsLeft) {
+        // just buffer
+        nextBlock |= v << (bitsLeft - bitsPerValue);
+        bitsLeft -= bitsPerValue;
+      } else {
+        // flush as many blocks as possible
+        int bits = bitsPerValue - bitsLeft;
+        blocks[blocksOffset++] = (byte) (nextBlock | (v >>> bits));
+        while (bits >= 8) {
+          bits -= 8;
+          blocks[blocksOffset++] = (byte) (v >>> bits);
+        }
+        // then buffer
+        bitsLeft = 8 - bits;
+        nextBlock = (int) ((v & ((1L << bits) - 1)) << bitsLeft);
      }
    }
+    assert bitsLeft == 8;
  }

  @Override
  public void encode(int[] values, int valuesOffset, byte[] blocks,
      int blocksOffset, int iterations) {
-    long nextBlock = 0;
-    int bitsLeft = 64;
-    for (int i = 0; i < valueCount * iterations; ++i) {
-      bitsLeft -= bitsPerValue;
-      if (bitsLeft > 0) {
-        nextBlock |= (values[valuesOffset++] & 0xFFFFFFFFL) << bitsLeft;
-      } else if (bitsLeft == 0) {
-        nextBlock |= (values[valuesOffset++] & 0xFFFFFFFFL);
-        blocksOffset = writeLong(nextBlock, blocks, blocksOffset);
-        nextBlock = 0;
-        bitsLeft = 64;
-      } else { // bitsLeft < 0
-        nextBlock |= (values[valuesOffset] & 0xFFFFFFFFL) >>> -bitsLeft;
-        blocksOffset = writeLong(nextBlock, blocks, blocksOffset);
-        nextBlock = (values[valuesOffset++] & ((1L << -bitsLeft) - 1)) << (64 + bitsLeft);
-        bitsLeft += 64;
+    int nextBlock = 0;
+    int bitsLeft = 8;
+    for (int i = 0; i < byteValueCount * iterations; ++i) {
+      final int v = values[valuesOffset++];
+      assert PackedInts.bitsRequired(v & 0xFFFFFFFFL) <= bitsPerValue;
+      if (bitsPerValue < bitsLeft) {
+        // just buffer
+        nextBlock |= v << (bitsLeft - bitsPerValue);
+        bitsLeft -= bitsPerValue;
+      } else {
+        // flush as many blocks as possible
+        int bits = bitsPerValue - bitsLeft;
+        blocks[blocksOffset++] = (byte) (nextBlock | (v >>> bits));
+        while (bits >= 8) {
+          bits -= 8;
+          blocks[blocksOffset++] = (byte) (v >>> bits);
+        }
+        // then buffer
+        bitsLeft = 8 - bits;
+        nextBlock = (v & ((1 << bits) - 1)) << bitsLeft;
      }
    }
+    assert bitsLeft == 8;
  }

 }
--- a/lucene/core/src/java/org/apache/lucene/util/packed/BulkOperationPacked1.java
+++ b/lucene/core/src/java/org/apache/lucene/util/packed/BulkOperationPacked1.java
@ -26,8 +26,6 @@ final class BulkOperationPacked1 extends BulkOperationPacked {

  public BulkOperationPacked1() {
    super(1);
-    assert blockCount() == 1;
-    assert valueCount() == 64;
  }

  @Override
@ -42,7 +40,7 @@ final class BulkOperationPacked1 extends BulkOperationPacked {

  @Override
  public void decode(byte[] blocks, int blocksOffset, int[] values, int valuesOffset, int iterations) {
-    for (int j = 0; j < 8 * iterations; ++j) {
+    for (int j = 0; j < iterations; ++j) {
      final byte block = blocks[blocksOffset++];
      values[valuesOffset++] = (block >>> 7) & 1;
      values[valuesOffset++] = (block >>> 6) & 1;
@ -67,7 +65,7 @@ final class BulkOperationPacked1 extends BulkOperationPacked {

  @Override
  public void decode(byte[] blocks, int blocksOffset, long[] values, int valuesOffset, int iterations) {
-    for (int j = 0; j < 8 * iterations; ++j) {
+    for (int j = 0; j < iterations; ++j) {
      final byte block = blocks[blocksOffset++];
      values[valuesOffset++] = (block >>> 7) & 1;
      values[valuesOffset++] = (block >>> 6) & 1;
--- a/lucene/core/src/java/org/apache/lucene/util/packed/BulkOperationPacked10.java
+++ b/lucene/core/src/java/org/apache/lucene/util/packed/BulkOperationPacked10.java
@ -26,8 +26,6 @@ final class BulkOperationPacked10 extends BulkOperationPacked {

  public BulkOperationPacked10() {
    super(10);
-    assert blockCount() == 5;
-    assert valueCount() == 32;
  }

  @Override
@ -75,7 +73,7 @@ final class BulkOperationPacked10 extends BulkOperationPacked {

  @Override
  public void decode(byte[] blocks, int blocksOffset, int[] values, int valuesOffset, int iterations) {
-    for (int i = 0; i < 8 * iterations; ++i) {
+    for (int i = 0; i < iterations; ++i) {
      final int byte0 = blocks[blocksOffset++] & 0xFF;
      final int byte1 = blocks[blocksOffset++] & 0xFF;
      values[valuesOffset++] = (byte0 << 2) | (byte1 >>> 6);
@ -133,7 +131,7 @@ final class BulkOperationPacked10 extends BulkOperationPacked {

  @Override
  public void decode(byte[] blocks, int blocksOffset, long[] values, int valuesOffset, int iterations) {
-    for (int i = 0; i < 8 * iterations; ++i) {
+    for (int i = 0; i < iterations; ++i) {
      final long byte0 = blocks[blocksOffset++] & 0xFF;
      final long byte1 = blocks[blocksOffset++] & 0xFF;
      values[valuesOffset++] = (byte0 << 2) | (byte1 >>> 6);
--- a/lucene/core/src/java/org/apache/lucene/util/packed/BulkOperationPacked11.java
+++ b/lucene/core/src/java/org/apache/lucene/util/packed/BulkOperationPacked11.java
@ -26,8 +26,6 @@ final class BulkOperationPacked11 extends BulkOperationPacked {

  public BulkOperationPacked11() {
    super(11);
-    assert blockCount() == 11;
-    assert valueCount() == 64;
  }

  @Override
@ -113,7 +111,7 @@ final class BulkOperationPacked11 extends BulkOperationPacked {

  @Override
  public void decode(byte[] blocks, int blocksOffset, int[] values, int valuesOffset, int iterations) {
-    for (int i = 0; i < 8 * iterations; ++i) {
+    for (int i = 0; i < iterations; ++i) {
      final int byte0 = blocks[blocksOffset++] & 0xFF;
      final int byte1 = blocks[blocksOffset++] & 0xFF;
      values[valuesOffset++] = (byte0 << 3) | (byte1 >>> 5);
@ -219,7 +217,7 @@ final class BulkOperationPacked11 extends BulkOperationPacked {

  @Override
  public void decode(byte[] blocks, int blocksOffset, long[] values, int valuesOffset, int iterations) {
-    for (int i = 0; i < 8 * iterations; ++i) {
+    for (int i = 0; i < iterations; ++i) {
      final long byte0 = blocks[blocksOffset++] & 0xFF;
      final long byte1 = blocks[blocksOffset++] & 0xFF;
      values[valuesOffset++] = (byte0 << 3) | (byte1 >>> 5);
--- a/lucene/core/src/java/org/apache/lucene/util/packed/BulkOperationPacked12.java
+++ b/lucene/core/src/java/org/apache/lucene/util/packed/BulkOperationPacked12.java
@ -26,8 +26,6 @@ final class BulkOperationPacked12 extends BulkOperationPacked {

  public BulkOperationPacked12() {
    super(12);
-    assert blockCount() == 3;
-    assert valueCount() == 16;
  }

  @Override
@ -57,7 +55,7 @@ final class BulkOperationPacked12 extends BulkOperationPacked {

  @Override
  public void decode(byte[] blocks, int blocksOffset, int[] values, int valuesOffset, int iterations) {
-    for (int i = 0; i < 8 * iterations; ++i) {
+    for (int i = 0; i < iterations; ++i) {
      final int byte0 = blocks[blocksOffset++] & 0xFF;
      final int byte1 = blocks[blocksOffset++] & 0xFF;
      values[valuesOffset++] = (byte0 << 4) | (byte1 >>> 4);
@ -93,7 +91,7 @@ final class BulkOperationPacked12 extends BulkOperationPacked {

  @Override
  public void decode(byte[] blocks, int blocksOffset, long[] values, int valuesOffset, int iterations) {
-    for (int i = 0; i < 8 * iterations; ++i) {
+    for (int i = 0; i < iterations; ++i) {
      final long byte0 = blocks[blocksOffset++] & 0xFF;
      final long byte1 = blocks[blocksOffset++] & 0xFF;
      values[valuesOffset++] = (byte0 << 4) | (byte1 >>> 4);
--- a/lucene/core/src/java/org/apache/lucene/util/packed/BulkOperationPacked13.java
+++ b/lucene/core/src/java/org/apache/lucene/util/packed/BulkOperationPacked13.java
@ -26,8 +26,6 @@ final class BulkOperationPacked13 extends BulkOperationPacked {

  public BulkOperationPacked13() {
    super(13);
-    assert blockCount() == 13;
-    assert valueCount() == 64;
  }

  @Override
@ -115,7 +113,7 @@ final class BulkOperationPacked13 extends BulkOperationPacked {

  @Override
  public void decode(byte[] blocks, int blocksOffset, int[] values, int valuesOffset, int iterations) {
-    for (int i = 0; i < 8 * iterations; ++i) {
+    for (int i = 0; i < iterations; ++i) {
      final int byte0 = blocks[blocksOffset++] & 0xFF;
      final int byte1 = blocks[blocksOffset++] & 0xFF;
      values[valuesOffset++] = (byte0 << 5) | (byte1 >>> 3);
@ -225,7 +223,7 @@ final class BulkOperationPacked13 extends BulkOperationPacked {

  @Override
  public void decode(byte[] blocks, int blocksOffset, long[] values, int valuesOffset, int iterations) {
-    for (int i = 0; i < 8 * iterations; ++i) {
+    for (int i = 0; i < iterations; ++i) {
      final long byte0 = blocks[blocksOffset++] & 0xFF;
      final long byte1 = blocks[blocksOffset++] & 0xFF;
      values[valuesOffset++] = (byte0 << 5) | (byte1 >>> 3);
--- a/lucene/core/src/java/org/apache/lucene/util/packed/BulkOperationPacked14.java
+++ b/lucene/core/src/java/org/apache/lucene/util/packed/BulkOperationPacked14.java
@ -26,8 +26,6 @@ final class BulkOperationPacked14 extends BulkOperationPacked {

  public BulkOperationPacked14() {
    super(14);
-    assert blockCount() == 7;
-    assert valueCount() == 32;
  }

  @Override
@ -77,7 +75,7 @@ final class BulkOperationPacked14 extends BulkOperationPacked {

  @Override
  public void decode(byte[] blocks, int blocksOffset, int[] values, int valuesOffset, int iterations) {
-    for (int i = 0; i < 8 * iterations; ++i) {
+    for (int i = 0; i < iterations; ++i) {
      final int byte0 = blocks[blocksOffset++] & 0xFF;
      final int byte1 = blocks[blocksOffset++] & 0xFF;
      values[valuesOffset++] = (byte0 << 6) | (byte1 >>> 2);
@ -139,7 +137,7 @@ final class BulkOperationPacked14 extends BulkOperationPacked {

  @Override
  public void decode(byte[] blocks, int blocksOffset, long[] values, int valuesOffset, int iterations) {
-    for (int i = 0; i < 8 * iterations; ++i) {
+    for (int i = 0; i < iterations; ++i) {
      final long byte0 = blocks[blocksOffset++] & 0xFF;
      final long byte1 = blocks[blocksOffset++] & 0xFF;
      values[valuesOffset++] = (byte0 << 6) | (byte1 >>> 2);
--- a/lucene/core/src/java/org/apache/lucene/util/packed/BulkOperationPacked15.java
+++ b/lucene/core/src/java/org/apache/lucene/util/packed/BulkOperationPacked15.java
@ -26,8 +26,6 @@ final class BulkOperationPacked15 extends BulkOperationPacked {

  public BulkOperationPacked15() {
    super(15);
-    assert blockCount() == 15;
-    assert valueCount() == 64;
  }

  @Override
@ -117,7 +115,7 @@ final class BulkOperationPacked15 extends BulkOperationPacked {

  @Override
  public void decode(byte[] blocks, int blocksOffset, int[] values, int valuesOffset, int iterations) {
-    for (int i = 0; i < 8 * iterations; ++i) {
+    for (int i = 0; i < iterations; ++i) {
      final int byte0 = blocks[blocksOffset++] & 0xFF;
      final int byte1 = blocks[blocksOffset++] & 0xFF;
      values[valuesOffset++] = (byte0 << 7) | (byte1 >>> 1);
@ -231,7 +229,7 @@ final class BulkOperationPacked15 extends BulkOperationPacked {

  @Override
  public void decode(byte[] blocks, int blocksOffset, long[] values, int valuesOffset, int iterations) {
-    for (int i = 0; i < 8 * iterations; ++i) {
+    for (int i = 0; i < iterations; ++i) {
      final long byte0 = blocks[blocksOffset++] & 0xFF;
      final long byte1 = blocks[blocksOffset++] & 0xFF;
      values[valuesOffset++] = (byte0 << 7) | (byte1 >>> 1);
--- a/lucene/core/src/java/org/apache/lucene/util/packed/BulkOperationPacked16.java
+++ b/lucene/core/src/java/org/apache/lucene/util/packed/BulkOperationPacked16.java
@ -26,8 +26,6 @@ final class BulkOperationPacked16 extends BulkOperationPacked {

  public BulkOperationPacked16() {
    super(16);
-    assert blockCount() == 1;
-    assert valueCount() == 4;
  }

  @Override
@ -42,7 +40,7 @@ final class BulkOperationPacked16 extends BulkOperationPacked {

  @Override
  public void decode(byte[] blocks, int blocksOffset, int[] values, int valuesOffset, int iterations) {
-    for (int j = 0; j < 4 * iterations; ++j) {
+    for (int j = 0; j < iterations; ++j) {
      values[valuesOffset++] = ((blocks[blocksOffset++] & 0xFF) << 8) | (blocks[blocksOffset++] & 0xFF);
    }
  }
@ -59,7 +57,7 @@ final class BulkOperationPacked16 extends BulkOperationPacked {

  @Override
  public void decode(byte[] blocks, int blocksOffset, long[] values, int valuesOffset, int iterations) {
-    for (int j = 0; j < 4 * iterations; ++j) {
+    for (int j = 0; j < iterations; ++j) {
      values[valuesOffset++] = ((blocks[blocksOffset++] & 0xFFL) << 8) | (blocks[blocksOffset++] & 0xFFL);
    }
  }
--- a/lucene/core/src/java/org/apache/lucene/util/packed/BulkOperationPacked17.java
+++ b/lucene/core/src/java/org/apache/lucene/util/packed/BulkOperationPacked17.java
@ -26,8 +26,6 @@ final class BulkOperationPacked17 extends BulkOperationPacked {

  public BulkOperationPacked17() {
    super(17);
-    assert blockCount() == 17;
-    assert valueCount() == 64;
  }

  @Override
@ -119,7 +117,7 @@ final class BulkOperationPacked17 extends BulkOperationPacked {

  @Override
  public void decode(byte[] blocks, int blocksOffset, int[] values, int valuesOffset, int iterations) {
-    for (int i = 0; i < 8 * iterations; ++i) {
+    for (int i = 0; i < iterations; ++i) {
      final int byte0 = blocks[blocksOffset++] & 0xFF;
      final int byte1 = blocks[blocksOffset++] & 0xFF;
      final int byte2 = blocks[blocksOffset++] & 0xFF;
@ -237,7 +235,7 @@ final class BulkOperationPacked17 extends BulkOperationPacked {

  @Override
  public void decode(byte[] blocks, int blocksOffset, long[] values, int valuesOffset, int iterations) {
-    for (int i = 0; i < 8 * iterations; ++i) {
+    for (int i = 0; i < iterations; ++i) {
      final long byte0 = blocks[blocksOffset++] & 0xFF;
      final long byte1 = blocks[blocksOffset++] & 0xFF;
      final long byte2 = blocks[blocksOffset++] & 0xFF;
--- a/lucene/core/src/java/org/apache/lucene/util/packed/BulkOperationPacked18.java
+++ b/lucene/core/src/java/org/apache/lucene/util/packed/BulkOperationPacked18.java
@ -26,8 +26,6 @@ final class BulkOperationPacked18 extends BulkOperationPacked {

  public BulkOperationPacked18() {
    super(18);
-    assert blockCount() == 9;
-    assert valueCount() == 32;
  }

  @Override
@ -79,7 +77,7 @@ final class BulkOperationPacked18 extends BulkOperationPacked {

  @Override
  public void decode(byte[] blocks, int blocksOffset, int[] values, int valuesOffset, int iterations) {
-    for (int i = 0; i < 8 * iterations; ++i) {
+    for (int i = 0; i < iterations; ++i) {
      final int byte0 = blocks[blocksOffset++] & 0xFF;
      final int byte1 = blocks[blocksOffset++] & 0xFF;
      final int byte2 = blocks[blocksOffset++] & 0xFF;
@ -145,7 +143,7 @@ final class BulkOperationPacked18 extends BulkOperationPacked {

  @Override
  public void decode(byte[] blocks, int blocksOffset, long[] values, int valuesOffset, int iterations) {
-    for (int i = 0; i < 8 * iterations; ++i) {
+    for (int i = 0; i < iterations; ++i) {
      final long byte0 = blocks[blocksOffset++] & 0xFF;
      final long byte1 = blocks[blocksOffset++] & 0xFF;
      final long byte2 = blocks[blocksOffset++] & 0xFF;
--- a/lucene/core/src/java/org/apache/lucene/util/packed/BulkOperationPacked19.java
+++ b/lucene/core/src/java/org/apache/lucene/util/packed/BulkOperationPacked19.java
@ -26,8 +26,6 @@ final class BulkOperationPacked19 extends BulkOperationPacked {

  public BulkOperationPacked19() {
    super(19);
-    assert blockCount() == 19;
-    assert valueCount() == 64;
  }

  @Override
@ -121,7 +119,7 @@ final class BulkOperationPacked19 extends BulkOperationPacked {

  @Override
  public void decode(byte[] blocks, int blocksOffset, int[] values, int valuesOffset, int iterations) {
-    for (int i = 0; i < 8 * iterations; ++i) {
+    for (int i = 0; i < iterations; ++i) {
      final int byte0 = blocks[blocksOffset++] & 0xFF;
      final int byte1 = blocks[blocksOffset++] & 0xFF;
      final int byte2 = blocks[blocksOffset++] & 0xFF;
@ -243,7 +241,7 @@ final class BulkOperationPacked19 extends BulkOperationPacked {

  @Override
  public void decode(byte[] blocks, int blocksOffset, long[] values, int valuesOffset, int iterations) {
-    for (int i = 0; i < 8 * iterations; ++i) {
+    for (int i = 0; i < iterations; ++i) {
      final long byte0 = blocks[blocksOffset++] & 0xFF;
      final long byte1 = blocks[blocksOffset++] & 0xFF;
      final long byte2 = blocks[blocksOffset++] & 0xFF;
--- a/lucene/core/src/java/org/apache/lucene/util/packed/BulkOperationPacked2.java
+++ b/lucene/core/src/java/org/apache/lucene/util/packed/BulkOperationPacked2.java
@ -26,8 +26,6 @@ final class BulkOperationPacked2 extends BulkOperationPacked {

  public BulkOperationPacked2() {
    super(2);
-    assert blockCount() == 1;
-    assert valueCount() == 32;
  }

  @Override
@ -42,7 +40,7 @@ final class BulkOperationPacked2 extends BulkOperationPacked {

  @Override
  public void decode(byte[] blocks, int blocksOffset, int[] values, int valuesOffset, int iterations) {
-    for (int j = 0; j < 8 * iterations; ++j) {
+    for (int j = 0; j < iterations; ++j) {
      final byte block = blocks[blocksOffset++];
      values[valuesOffset++] = (block >>> 6) & 3;
      values[valuesOffset++] = (block >>> 4) & 3;
@ -63,7 +61,7 @@ final class BulkOperationPacked2 extends BulkOperationPacked {

  @Override
  public void decode(byte[] blocks, int blocksOffset, long[] values, int valuesOffset, int iterations) {
-    for (int j = 0; j < 8 * iterations; ++j) {
+    for (int j = 0; j < iterations; ++j) {
      final byte block = blocks[blocksOffset++];
      values[valuesOffset++] = (block >>> 6) & 3;
      values[valuesOffset++] = (block >>> 4) & 3;
--- a/lucene/core/src/java/org/apache/lucene/util/packed/BulkOperationPacked20.java
+++ b/lucene/core/src/java/org/apache/lucene/util/packed/BulkOperationPacked20.java
@ -26,8 +26,6 @@ final class BulkOperationPacked20 extends BulkOperationPacked {

  public BulkOperationPacked20() {
    super(20);
-    assert blockCount() == 5;
-    assert valueCount() == 16;
  }

  @Override
@ -59,7 +57,7 @@ final class BulkOperationPacked20 extends BulkOperationPacked {

  @Override
  public void decode(byte[] blocks, int blocksOffset, int[] values, int valuesOffset, int iterations) {
-    for (int i = 0; i < 8 * iterations; ++i) {
+    for (int i = 0; i < iterations; ++i) {
      final int byte0 = blocks[blocksOffset++] & 0xFF;
      final int byte1 = blocks[blocksOffset++] & 0xFF;
      final int byte2 = blocks[blocksOffset++] & 0xFF;
@ -99,7 +97,7 @@ final class BulkOperationPacked20 extends BulkOperationPacked {

  @Override
  public void decode(byte[] blocks, int blocksOffset, long[] values, int valuesOffset, int iterations) {
-    for (int i = 0; i < 8 * iterations; ++i) {
+    for (int i = 0; i < iterations; ++i) {
      final long byte0 = blocks[blocksOffset++] & 0xFF;
      final long byte1 = blocks[blocksOffset++] & 0xFF;
      final long byte2 = blocks[blocksOffset++] & 0xFF;
--- a/lucene/core/src/java/org/apache/lucene/util/packed/BulkOperationPacked21.java
+++ b/lucene/core/src/java/org/apache/lucene/util/packed/BulkOperationPacked21.java
@ -26,8 +26,6 @@ final class BulkOperationPacked21 extends BulkOperationPacked {

  public BulkOperationPacked21() {
    super(21);
-    assert blockCount() == 21;
-    assert valueCount() == 64;
  }

  @Override
@ -123,7 +121,7 @@ final class BulkOperationPacked21 extends BulkOperationPacked {

  @Override
  public void decode(byte[] blocks, int blocksOffset, int[] values, int valuesOffset, int iterations) {
-    for (int i = 0; i < 8 * iterations; ++i) {
+    for (int i = 0; i < iterations; ++i) {
      final int byte0 = blocks[blocksOffset++] & 0xFF;
      final int byte1 = blocks[blocksOffset++] & 0xFF;
      final int byte2 = blocks[blocksOffset++] & 0xFF;
@ -249,7 +247,7 @@ final class BulkOperationPacked21 extends BulkOperationPacked {

  @Override
  public void decode(byte[] blocks, int blocksOffset, long[] values, int valuesOffset, int iterations) {
-    for (int i = 0; i < 8 * iterations; ++i) {
+    for (int i = 0; i < iterations; ++i) {
      final long byte0 = blocks[blocksOffset++] & 0xFF;
      final long byte1 = blocks[blocksOffset++] & 0xFF;
      final long byte2 = blocks[blocksOffset++] & 0xFF;
--- a/lucene/core/src/java/org/apache/lucene/util/packed/BulkOperationPacked22.java
+++ b/lucene/core/src/java/org/apache/lucene/util/packed/BulkOperationPacked22.java
@ -26,8 +26,6 @@ final class BulkOperationPacked22 extends BulkOperationPacked {

  public BulkOperationPacked22() {
    super(22);
-    assert blockCount() == 11;
-    assert valueCount() == 32;
  }

  @Override
@ -81,7 +79,7 @@ final class BulkOperationPacked22 extends BulkOperationPacked {

  @Override
  public void decode(byte[] blocks, int blocksOffset, int[] values, int valuesOffset, int iterations) {
-    for (int i = 0; i < 8 * iterations; ++i) {
+    for (int i = 0; i < iterations; ++i) {
      final int byte0 = blocks[blocksOffset++] & 0xFF;
      final int byte1 = blocks[blocksOffset++] & 0xFF;
      final int byte2 = blocks[blocksOffset++] & 0xFF;
@ -151,7 +149,7 @@ final class BulkOperationPacked22 extends BulkOperationPacked {

  @Override
  public void decode(byte[] blocks, int blocksOffset, long[] values, int valuesOffset, int iterations) {
-    for (int i = 0; i < 8 * iterations; ++i) {
+    for (int i = 0; i < iterations; ++i) {
      final long byte0 = blocks[blocksOffset++] & 0xFF;
      final long byte1 = blocks[blocksOffset++] & 0xFF;
      final long byte2 = blocks[blocksOffset++] & 0xFF;
--- a/lucene/core/src/java/org/apache/lucene/util/packed/BulkOperationPacked23.java
+++ b/lucene/core/src/java/org/apache/lucene/util/packed/BulkOperationPacked23.java
@ -26,8 +26,6 @@ final class BulkOperationPacked23 extends BulkOperationPacked {

  public BulkOperationPacked23() {
    super(23);
-    assert blockCount() == 23;
-    assert valueCount() == 64;
  }

  @Override
@ -125,7 +123,7 @@ final class BulkOperationPacked23 extends BulkOperationPacked {

  @Override
  public void decode(byte[] blocks, int blocksOffset, int[] values, int valuesOffset, int iterations) {
-    for (int i = 0; i < 8 * iterations; ++i) {
+    for (int i = 0; i < iterations; ++i) {
      final int byte0 = blocks[blocksOffset++] & 0xFF;
      final int byte1 = blocks[blocksOffset++] & 0xFF;
      final int byte2 = blocks[blocksOffset++] & 0xFF;
@ -255,7 +253,7 @@ final class BulkOperationPacked23 extends BulkOperationPacked {

  @Override
  public void decode(byte[] blocks, int blocksOffset, long[] values, int valuesOffset, int iterations) {
-    for (int i = 0; i < 8 * iterations; ++i) {
+    for (int i = 0; i < iterations; ++i) {
      final long byte0 = blocks[blocksOffset++] & 0xFF;
      final long byte1 = blocks[blocksOffset++] & 0xFF;
      final long byte2 = blocks[blocksOffset++] & 0xFF;
--- a/lucene/core/src/java/org/apache/lucene/util/packed/BulkOperationPacked24.java
+++ b/lucene/core/src/java/org/apache/lucene/util/packed/BulkOperationPacked24.java
@ -26,8 +26,6 @@ final class BulkOperationPacked24 extends BulkOperationPacked {

  public BulkOperationPacked24() {
    super(24);
-    assert blockCount() == 3;
-    assert valueCount() == 8;
  }

  @Override
@ -49,7 +47,7 @@ final class BulkOperationPacked24 extends BulkOperationPacked {

  @Override
  public void decode(byte[] blocks, int blocksOffset, int[] values, int valuesOffset, int iterations) {
-    for (int i = 0; i < 8 * iterations; ++i) {
+    for (int i = 0; i < iterations; ++i) {
      final int byte0 = blocks[blocksOffset++] & 0xFF;
      final int byte1 = blocks[blocksOffset++] & 0xFF;
      final int byte2 = blocks[blocksOffset++] & 0xFF;
@ -76,7 +74,7 @@ final class BulkOperationPacked24 extends BulkOperationPacked {

  @Override
  public void decode(byte[] blocks, int blocksOffset, long[] values, int valuesOffset, int iterations) {
-    for (int i = 0; i < 8 * iterations; ++i) {
+    for (int i = 0; i < iterations; ++i) {
      final long byte0 = blocks[blocksOffset++] & 0xFF;
      final long byte1 = blocks[blocksOffset++] & 0xFF;
      final long byte2 = blocks[blocksOffset++] & 0xFF;
--- a/lucene/core/src/java/org/apache/lucene/util/packed/BulkOperationPacked3.java
+++ b/lucene/core/src/java/org/apache/lucene/util/packed/BulkOperationPacked3.java
@ -26,8 +26,6 @@ final class BulkOperationPacked3 extends BulkOperationPacked {

  public BulkOperationPacked3() {
    super(3);
-    assert blockCount() == 3;
-    assert valueCount() == 64;
  }

  @Override
@ -105,7 +103,7 @@ final class BulkOperationPacked3 extends BulkOperationPacked {

  @Override
  public void decode(byte[] blocks, int blocksOffset, int[] values, int valuesOffset, int iterations) {
-    for (int i = 0; i < 8 * iterations; ++i) {
+    for (int i = 0; i < iterations; ++i) {
      final int byte0 = blocks[blocksOffset++] & 0xFF;
      values[valuesOffset++] = byte0 >>> 5;
      values[valuesOffset++] = (byte0 >>> 2) & 7;
@ -195,7 +193,7 @@ final class BulkOperationPacked3 extends BulkOperationPacked {

  @Override
  public void decode(byte[] blocks, int blocksOffset, long[] values, int valuesOffset, int iterations) {
-    for (int i = 0; i < 8 * iterations; ++i) {
+    for (int i = 0; i < iterations; ++i) {
      final long byte0 = blocks[blocksOffset++] & 0xFF;
      values[valuesOffset++] = byte0 >>> 5;
      values[valuesOffset++] = (byte0 >>> 2) & 7;
--- a/lucene/core/src/java/org/apache/lucene/util/packed/BulkOperationPacked4.java
+++ b/lucene/core/src/java/org/apache/lucene/util/packed/BulkOperationPacked4.java
@ -26,8 +26,6 @@ final class BulkOperationPacked4 extends BulkOperationPacked {

  public BulkOperationPacked4() {
    super(4);
-    assert blockCount() == 1;
-    assert valueCount() == 16;
  }

  @Override
@ -42,7 +40,7 @@ final class BulkOperationPacked4 extends BulkOperationPacked {

  @Override
  public void decode(byte[] blocks, int blocksOffset, int[] values, int valuesOffset, int iterations) {
-    for (int j = 0; j < 8 * iterations; ++j) {
+    for (int j = 0; j < iterations; ++j) {
      final byte block = blocks[blocksOffset++];
      values[valuesOffset++] = (block >>> 4) & 15;
      values[valuesOffset++] = block & 15;
@ -61,7 +59,7 @@ final class BulkOperationPacked4 extends BulkOperationPacked {

  @Override
  public void decode(byte[] blocks, int blocksOffset, long[] values, int valuesOffset, int iterations) {
-    for (int j = 0; j < 8 * iterations; ++j) {
+    for (int j = 0; j < iterations; ++j) {
      final byte block = blocks[blocksOffset++];
      values[valuesOffset++] = (block >>> 4) & 15;
      values[valuesOffset++] = block & 15;
--- a/lucene/core/src/java/org/apache/lucene/util/packed/BulkOperationPacked5.java
+++ b/lucene/core/src/java/org/apache/lucene/util/packed/BulkOperationPacked5.java
@ -26,8 +26,6 @@ final class BulkOperationPacked5 extends BulkOperationPacked {

  public BulkOperationPacked5() {
    super(5);
-    assert blockCount() == 5;
-    assert valueCount() == 64;
  }

  @Override
@ -107,7 +105,7 @@ final class BulkOperationPacked5 extends BulkOperationPacked {

  @Override
  public void decode(byte[] blocks, int blocksOffset, int[] values, int valuesOffset, int iterations) {
-    for (int i = 0; i < 8 * iterations; ++i) {
+    for (int i = 0; i < iterations; ++i) {
      final int byte0 = blocks[blocksOffset++] & 0xFF;
      values[valuesOffset++] = byte0 >>> 3;
      final int byte1 = blocks[blocksOffset++] & 0xFF;
@ -201,7 +199,7 @@ final class BulkOperationPacked5 extends BulkOperationPacked {

  @Override
  public void decode(byte[] blocks, int blocksOffset, long[] values, int valuesOffset, int iterations) {
-    for (int i = 0; i < 8 * iterations; ++i) {
+    for (int i = 0; i < iterations; ++i) {
      final long byte0 = blocks[blocksOffset++] & 0xFF;
      values[valuesOffset++] = byte0 >>> 3;
      final long byte1 = blocks[blocksOffset++] & 0xFF;
--- a/lucene/core/src/java/org/apache/lucene/util/packed/BulkOperationPacked6.java
+++ b/lucene/core/src/java/org/apache/lucene/util/packed/BulkOperationPacked6.java
@ -26,8 +26,6 @@ final class BulkOperationPacked6 extends BulkOperationPacked {

  public BulkOperationPacked6() {
    super(6);
-    assert blockCount() == 3;
-    assert valueCount() == 32;
  }

  @Override
@ -73,7 +71,7 @@ final class BulkOperationPacked6 extends BulkOperationPacked {

  @Override
  public void decode(byte[] blocks, int blocksOffset, int[] values, int valuesOffset, int iterations) {
-    for (int i = 0; i < 8 * iterations; ++i) {
+    for (int i = 0; i < iterations; ++i) {
      final int byte0 = blocks[blocksOffset++] & 0xFF;
      values[valuesOffset++] = byte0 >>> 2;
      final int byte1 = blocks[blocksOffset++] & 0xFF;
@ -127,7 +125,7 @@ final class BulkOperationPacked6 extends BulkOperationPacked {

  @Override
  public void decode(byte[] blocks, int blocksOffset, long[] values, int valuesOffset, int iterations) {
-    for (int i = 0; i < 8 * iterations; ++i) {
+    for (int i = 0; i < iterations; ++i) {
      final long byte0 = blocks[blocksOffset++] & 0xFF;
      values[valuesOffset++] = byte0 >>> 2;
      final long byte1 = blocks[blocksOffset++] & 0xFF;
--- a/lucene/core/src/java/org/apache/lucene/util/packed/BulkOperationPacked7.java
+++ b/lucene/core/src/java/org/apache/lucene/util/packed/BulkOperationPacked7.java
@ -26,8 +26,6 @@ final class BulkOperationPacked7 extends BulkOperationPacked {

  public BulkOperationPacked7() {
    super(7);
-    assert blockCount() == 7;
-    assert valueCount() == 64;
  }

  @Override
@ -109,7 +107,7 @@ final class BulkOperationPacked7 extends BulkOperationPacked {

  @Override
  public void decode(byte[] blocks, int blocksOffset, int[] values, int valuesOffset, int iterations) {
-    for (int i = 0; i < 8 * iterations; ++i) {
+    for (int i = 0; i < iterations; ++i) {
      final int byte0 = blocks[blocksOffset++] & 0xFF;
      values[valuesOffset++] = byte0 >>> 1;
      final int byte1 = blocks[blocksOffset++] & 0xFF;
@ -207,7 +205,7 @@ final class BulkOperationPacked7 extends BulkOperationPacked {

  @Override
  public void decode(byte[] blocks, int blocksOffset, long[] values, int valuesOffset, int iterations) {
-    for (int i = 0; i < 8 * iterations; ++i) {
+    for (int i = 0; i < iterations; ++i) {
      final long byte0 = blocks[blocksOffset++] & 0xFF;
      values[valuesOffset++] = byte0 >>> 1;
      final long byte1 = blocks[blocksOffset++] & 0xFF;
--- a/lucene/core/src/java/org/apache/lucene/util/packed/BulkOperationPacked8.java
+++ b/lucene/core/src/java/org/apache/lucene/util/packed/BulkOperationPacked8.java
@ -26,8 +26,6 @@ final class BulkOperationPacked8 extends BulkOperationPacked {

  public BulkOperationPacked8() {
    super(8);
-    assert blockCount() == 1;
-    assert valueCount() == 8;
  }

  @Override
@ -42,7 +40,7 @@ final class BulkOperationPacked8 extends BulkOperationPacked {

  @Override
  public void decode(byte[] blocks, int blocksOffset, int[] values, int valuesOffset, int iterations) {
-    for (int j = 0; j < 8 * iterations; ++j) {
+    for (int j = 0; j < iterations; ++j) {
      values[valuesOffset++] = blocks[blocksOffset++] & 0xFF;
    }
  }
@ -59,7 +57,7 @@ final class BulkOperationPacked8 extends BulkOperationPacked {

  @Override
  public void decode(byte[] blocks, int blocksOffset, long[] values, int valuesOffset, int iterations) {
-    for (int j = 0; j < 8 * iterations; ++j) {
+    for (int j = 0; j < iterations; ++j) {
      values[valuesOffset++] = blocks[blocksOffset++] & 0xFF;
    }
  }
--- a/lucene/core/src/java/org/apache/lucene/util/packed/BulkOperationPacked9.java
+++ b/lucene/core/src/java/org/apache/lucene/util/packed/BulkOperationPacked9.java
@ -26,8 +26,6 @@ final class BulkOperationPacked9 extends BulkOperationPacked {

  public BulkOperationPacked9() {
    super(9);
-    assert blockCount() == 9;
-    assert valueCount() == 64;
  }

  @Override
@ -111,7 +109,7 @@ final class BulkOperationPacked9 extends BulkOperationPacked {

  @Override
  public void decode(byte[] blocks, int blocksOffset, int[] values, int valuesOffset, int iterations) {
-    for (int i = 0; i < 8 * iterations; ++i) {
+    for (int i = 0; i < iterations; ++i) {
      final int byte0 = blocks[blocksOffset++] & 0xFF;
      final int byte1 = blocks[blocksOffset++] & 0xFF;
      values[valuesOffset++] = (byte0 << 1) | (byte1 >>> 7);
@ -213,7 +211,7 @@ final class BulkOperationPacked9 extends BulkOperationPacked {

  @Override
  public void decode(byte[] blocks, int blocksOffset, long[] values, int valuesOffset, int iterations) {
-    for (int i = 0; i < 8 * iterations; ++i) {
+    for (int i = 0; i < iterations; ++i) {
      final long byte0 = blocks[blocksOffset++] & 0xFF;
      final long byte1 = blocks[blocksOffset++] & 0xFF;
      values[valuesOffset++] = (byte0 << 1) | (byte1 >>> 7);
--- a/lucene/core/src/java/org/apache/lucene/util/packed/BulkOperationPackedSingleBlock.java
+++ b/lucene/core/src/java/org/apache/lucene/util/packed/BulkOperationPackedSingleBlock.java
@ -35,12 +35,22 @@ final class BulkOperationPackedSingleBlock extends BulkOperation {
  }

  @Override
-  public final int blockCount() {
+  public final int longBlockCount() {
    return BLOCK_COUNT;
  }

  @Override
-  public int valueCount() {
+  public final int byteBlockCount() {
+    return BLOCK_COUNT * 8;
+  }
+
+  @Override
+  public int longValueCount() {
+    return valueCount;
+  }
+
+  @Override
+  public final int byteValueCount() {
    return valueCount;
  }

--- a/lucene/core/src/java/org/apache/lucene/util/packed/Packed64.java
+++ b/lucene/core/src/java/org/apache/lucene/util/packed/Packed64.java
@ -140,9 +140,9 @@ class Packed64 extends PackedInts.MutableImpl {
    final PackedInts.Decoder decoder = BulkOperation.of(PackedInts.Format.PACKED, bitsPerValue);

    // go to the next block where the value does not span across two blocks
-    final int offsetInBlocks = index % decoder.valueCount();
+    final int offsetInBlocks = index % decoder.longValueCount();
    if (offsetInBlocks != 0) {
-      for (int i = offsetInBlocks; i < decoder.valueCount() && len > 0; ++i) {
+      for (int i = offsetInBlocks; i < decoder.longValueCount() && len > 0; ++i) {
        arr[off++] = get(index++);
        --len;
      }
@ -152,12 +152,12 @@ class Packed64 extends PackedInts.MutableImpl {
    }

    // bulk get
-    assert index % decoder.valueCount() == 0;
+    assert index % decoder.longValueCount() == 0;
    int blockIndex = (int) ((long) index * bitsPerValue) >>> BLOCK_BITS;
    assert (((long)index * bitsPerValue) & MOD_MASK) == 0;
-    final int iterations = len / decoder.valueCount();
+    final int iterations = len / decoder.longValueCount();
    decoder.decode(blocks, blockIndex, arr, off, iterations);
-    final int gotValues = iterations * decoder.valueCount();
+    final int gotValues = iterations * decoder.longValueCount();
    index += gotValues;
    len -= gotValues;
    assert len >= 0;
@ -204,9 +204,9 @@ class Packed64 extends PackedInts.MutableImpl {
    final PackedInts.Encoder encoder = BulkOperation.of(PackedInts.Format.PACKED, bitsPerValue);

    // go to the next block where the value does not span across two blocks
-    final int offsetInBlocks = index % encoder.valueCount();
+    final int offsetInBlocks = index % encoder.longValueCount();
    if (offsetInBlocks != 0) {
-      for (int i = offsetInBlocks; i < encoder.valueCount() && len > 0; ++i) {
+      for (int i = offsetInBlocks; i < encoder.longValueCount() && len > 0; ++i) {
        set(index++, arr[off++]);
        --len;
      }
@ -216,12 +216,12 @@ class Packed64 extends PackedInts.MutableImpl {
    }

    // bulk set
-    assert index % encoder.valueCount() == 0;
+    assert index % encoder.longValueCount() == 0;
    int blockIndex = (int) ((long) index * bitsPerValue) >>> BLOCK_BITS;
    assert (((long)index * bitsPerValue) & MOD_MASK) == 0;
-    final int iterations = len / encoder.valueCount();
+    final int iterations = len / encoder.longValueCount();
    encoder.encode(arr, off, blocks, blockIndex, iterations);
-    final int setValues = iterations * encoder.valueCount();
+    final int setValues = iterations * encoder.longValueCount();
    index += setValues;
    len -= setValues;
    assert len >= 0;
--- a/lucene/core/src/java/org/apache/lucene/util/packed/Packed64SingleBlock.java
+++ b/lucene/core/src/java/org/apache/lucene/util/packed/Packed64SingleBlock.java
@ -92,8 +92,8 @@ abstract class Packed64SingleBlock extends PackedInts.MutableImpl {
    // bulk get
    assert index % valuesPerBlock == 0;
    final PackedInts.Decoder decoder = BulkOperation.of(PackedInts.Format.PACKED_SINGLE_BLOCK, bitsPerValue);
-    assert decoder.blockCount() == 1;
-    assert decoder.valueCount() == valuesPerBlock;
+    assert decoder.longBlockCount() == 1;
+    assert decoder.longValueCount() == valuesPerBlock;
    final int blockIndex = index / valuesPerBlock;
    final int nblocks = (index + len) / valuesPerBlock - blockIndex;
    decoder.decode(blocks, blockIndex, arr, off, nblocks);
@ -136,8 +136,8 @@ abstract class Packed64SingleBlock extends PackedInts.MutableImpl {
    // bulk set
    assert index % valuesPerBlock == 0;
    final BulkOperation op = BulkOperation.of(PackedInts.Format.PACKED_SINGLE_BLOCK, bitsPerValue);
-    assert op.blockCount() == 1;
-    assert op.valueCount() == valuesPerBlock;
+    assert op.longBlockCount() == 1;
+    assert op.longValueCount() == valuesPerBlock;
    final int blockIndex = index / valuesPerBlock;
    final int nblocks = (index + len) / valuesPerBlock - blockIndex;
    op.encode(arr, off, blocks, blockIndex, nblocks);
--- a/lucene/core/src/java/org/apache/lucene/util/packed/PackedInts.java
+++ b/lucene/core/src/java/org/apache/lucene/util/packed/PackedInts.java
@ -280,15 +280,28 @@ public class PackedInts {
  public static interface Decoder {

    /**
-     * The minimum number of long blocks to decode in a single call.
+     * The minimum number of long blocks to encode in a single iteration, when
+     * using long encoding.
     */
-    int blockCount();
+    int longBlockCount();

    /**
-     * The number of values that can be stored in <code>blockCount()</code> long
+     * The number of values that can be stored in {@link #longBlockCount()} long
     * blocks.
     */
-    int valueCount();
+    int longValueCount();
+
+    /**
+     * The minimum number of byte blocks to encode in a single iteration, when
+     * using byte encoding.
+     */
+    int byteBlockCount();
+
+    /**
+     * The number of values that can be stored in {@link #byteBlockCount()} byte
+     * blocks.
+     */
+    int byteValueCount();

    /**
     * Read <code>iterations * blockCount()</code> blocks from <code>blocks</code>,
@ -350,15 +363,28 @@ public class PackedInts {
  public static interface Encoder {

    /**
-     * The minimum number of long blocks to encode in a single call.
+     * The minimum number of long blocks to encode in a single iteration, when
+     * using long encoding.
     */
-    int blockCount();
+    int longBlockCount();

    /**
-     * The number of values that can be stored in <code>blockCount()</code> long
+     * The number of values that can be stored in {@link #longBlockCount()} long
     * blocks.
     */
-    int valueCount();
+    int longValueCount();
+
+    /**
+     * The minimum number of byte blocks to encode in a single iteration, when
+     * using byte encoding.
+     */
+    int byteBlockCount();
+
+    /**
+     * The number of values that can be stored in {@link #byteBlockCount()} byte
+     * blocks.
+     */
+    int byteValueCount();

    /**
     * Read <code>iterations * valueCount()</code> values from <code>values</code>,
--- a/lucene/core/src/java/org/apache/lucene/util/packed/PackedReaderIterator.java
+++ b/lucene/core/src/java/org/apache/lucene/util/packed/PackedReaderIterator.java
@ -39,14 +39,23 @@ final class PackedReaderIterator extends PackedInts.ReaderIteratorImpl {
    this.format = format;
    this.packedIntsVersion = packedIntsVersion;
    bulkOperation = BulkOperation.of(format, bitsPerValue);
-    iterations = bulkOperation.computeIterations(valueCount, mem);
+    iterations = iterations(mem);
    assert valueCount == 0 || iterations > 0;
-    nextBlocks = new byte[8 * iterations * bulkOperation.blockCount()];
-    nextValues = new LongsRef(new long[iterations * bulkOperation.valueCount()], 0, 0);
+    nextBlocks = new byte[iterations * bulkOperation.byteBlockCount()];
+    nextValues = new LongsRef(new long[iterations * bulkOperation.byteValueCount()], 0, 0);
    nextValues.offset = nextValues.longs.length;
    position = -1;
  }

+  private int iterations(int mem) {
+    int iterations = bulkOperation.computeIterations(valueCount, mem);
+    if (packedIntsVersion < PackedInts.VERSION_BYTE_ALIGNED) {
+      // make sure iterations is a multiple of 8
+      iterations = (iterations + 7) & 0xFFFFFFF8;
+    }
+    return iterations;
+  }
+
  @Override
  public LongsRef next(int count) throws IOException {
    assert nextValues.length >= 0;
--- a/lucene/core/src/java/org/apache/lucene/util/packed/PackedWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/util/packed/PackedWriter.java
@ -42,8 +42,8 @@ final class PackedWriter extends PackedInts.Writer {
    this.format = format;
    encoder = BulkOperation.of(format, bitsPerValue);
    iterations = encoder.computeIterations(valueCount, mem);
-    nextBlocks = new byte[8 * iterations * encoder.blockCount()];
-    nextValues = new long[iterations * encoder.valueCount()];
+    nextBlocks = new byte[iterations * encoder.byteBlockCount()];
+    nextValues = new long[iterations * encoder.byteValueCount()];
    off = 0;
    written = 0;
    finished = false;
--- a/lucene/core/src/java/org/apache/lucene/util/packed/gen_BulkOperation.py
+++ b/lucene/core/src/java/org/apache/lucene/util/packed/gen_BulkOperation.py
@ -57,28 +57,28 @@ FOOTER="""
   * For every number of bits per value, there is a minimum number of
   * blocks (b) / values (v) you need to write in order to reach the next block
   * boundary:
-   *  - 16 bits per value -> b=1, v=4
-   *  - 24 bits per value -> b=3, v=8
-   *  - 50 bits per value -> b=25, v=32
-   *  - 63 bits per value -> b=63, v=64
+   *  - 16 bits per value -> b=2, v=1
+   *  - 24 bits per value -> b=3, v=1
+   *  - 50 bits per value -> b=25, v=4
+   *  - 63 bits per value -> b=63, v=8
   *  - ...
   *
   * A bulk read consists in copying <code>iterations*v</code> values that are
   * contained in <code>iterations*b</code> blocks into a <code>long[]</code>
   * (higher values of <code>iterations</code> are likely to yield a better
-   * throughput) => this requires n * (b + v) longs in memory.
+   * throughput) => this requires n * (b + 8v) bytes of memory.
   *
   * This method computes <code>iterations</code> as
-   * <code>ramBudget / (8 * (b + v))</code> (since a long is 8 bytes).
+   * <code>ramBudget / (b + 8v)</code> (since a long is 8 bytes).
   */
  public final int computeIterations(int valueCount, int ramBudget) {
-    final int iterations = (ramBudget >>> 3) / (blockCount() + valueCount());
+    final int iterations = ramBudget / (byteBlockCount() + 8 * byteValueCount());
    if (iterations == 0) {
      // at least 1
      return 1;
-    } else if ((iterations - 1) * blockCount() >= valueCount) {
+    } else if ((iterations - 1) * byteValueCount() >= valueCount) {
      // don't allocate for more than the size of the reader
-      return (int) Math.ceil((double) valueCount / valueCount());
+      return (int) Math.ceil((double) valueCount / byteValueCount());
    } else {
      return iterations;
    }
@ -131,14 +131,11 @@ def block_value_count(bpv, bits=64):
  return (blocks, values)

 def packed64(bpv, f):
-  blocks, values = block_value_count(bpv)
  mask = (1 << bpv) - 1

  f.write("\n")
  f.write("  public BulkOperationPacked%d() {\n" %bpv)
  f.write("    super(%d);\n" %bpv)
-  f.write("    assert blockCount() == %d;\n" %blocks)
-  f.write("    assert valueCount() == %d;\n" %values)
  f.write("  }\n\n")

  if bpv == 64:
@ -215,20 +212,19 @@ def p64_decode(bpv, f, bits):
  if bits < bpv:
    f.write("    throw new UnsupportedOperationException();\n")
  else:
-
    if is_power_of_two(bpv) and bpv < 8:
-      f.write("    for (int j = 0; j < 8 * iterations; ++j) {\n")
+      f.write("    for (int j = 0; j < iterations; ++j) {\n")
      f.write("      final byte block = blocks[blocksOffset++];\n")
      for shift in xrange(8 - bpv, 0, -bpv):
        f.write("      values[valuesOffset++] = (block >>> %d) & %d;\n" %(shift, mask))
      f.write("      values[valuesOffset++] = block & %d;\n" %mask)
      f.write("    }\n")
    elif bpv == 8:
-      f.write("    for (int j = 0; j < 8 * iterations; ++j) {\n")
+      f.write("    for (int j = 0; j < iterations; ++j) {\n")
      f.write("      values[valuesOffset++] = blocks[blocksOffset++] & 0xFF;\n")
      f.write("    }\n")
    elif is_power_of_two(bpv) and bpv > 8:
-      f.write("    for (int j = 0; j < %d * iterations; ++j) {\n" %(64 / bpv))
+      f.write("    for (int j = 0; j < iterations; ++j) {\n")
      m = bits <= 32 and "0xFF" or "0xFFL"
      f.write("      values[valuesOffset++] =")
      for i in xrange(bpv / 8 - 1):
@ -236,7 +232,7 @@ def p64_decode(bpv, f, bits):
      f.write(" (blocks[blocksOffset++] & %s);\n" %m)
      f.write("    }\n")
    else:
-      f.write("    for (int i = 0; i < 8 * iterations; ++i) {\n")
+      f.write("    for (int i = 0; i < iterations; ++i) {\n")
      for i in xrange(0, byte_values):
        byte_start = i * bpv / 8
        bit_start = (i * bpv) % 8
--- a/lucene/core/src/test/org/apache/lucene/util/packed/TestPackedInts.java
+++ b/lucene/core/src/test/org/apache/lucene/util/packed/TestPackedInts.java
@ -212,7 +212,7 @@ public class TestPackedInts extends LuceneTestCase {
          if (!format.isSupported(bpv)) {
            continue;
          }
-          final long byteCount = format.byteCount(version, valueCount, bpv); 
+          final long byteCount = format.byteCount(version, valueCount, bpv);
          String msg = "format=" + format + ",version=" + version + ",valueCount=" + valueCount + ",bpv=" + bpv;

          // test iterator
@ -706,16 +706,22 @@ public class TestPackedInts extends LuceneTestCase {

        final PackedInts.Encoder encoder = PackedInts.getEncoder(format, PackedInts.VERSION_CURRENT, bpv);
        final PackedInts.Decoder decoder = PackedInts.getDecoder(format, PackedInts.VERSION_CURRENT, bpv);
-        final int blockCount = encoder.blockCount();
-        final int valueCount = encoder.valueCount();
-        assertEquals(blockCount, decoder.blockCount());
-        assertEquals(valueCount, decoder.valueCount());
+        final int longBlockCount = encoder.longBlockCount();
+        final int longValueCount = encoder.longValueCount();
+        final int byteBlockCount = encoder.byteBlockCount();
+        final int byteValueCount = encoder.byteValueCount();
+        assertEquals(longBlockCount, decoder.longBlockCount());
+        assertEquals(longValueCount, decoder.longValueCount());
+        assertEquals(byteBlockCount, decoder.byteBlockCount());
+        assertEquals(byteValueCount, decoder.byteValueCount());

-        final int iterations = random().nextInt(100);
+        final int longIterations = random().nextInt(100);
+        final int byteIterations = longIterations * longValueCount / byteValueCount;
+        assertEquals(longIterations * longValueCount, byteIterations * byteValueCount);
        final int blocksOffset = random().nextInt(100);
        final int valuesOffset = random().nextInt(100);
        final int blocksOffset2 = random().nextInt(100);
-        final int blocksLen = iterations * blockCount;
+        final int blocksLen = longIterations * longBlockCount;

        // 1. generate random inputs
        final long[] blocks = new long[blocksOffset + blocksLen];
@ -729,8 +735,8 @@ public class TestPackedInts extends LuceneTestCase {
        }

        // 2. decode
-        final long[] values = new long[valuesOffset + iterations * valueCount];
-        decoder.decode(blocks, blocksOffset, values, valuesOffset, iterations);
+        final long[] values = new long[valuesOffset + longIterations * longValueCount];
+        decoder.decode(blocks, blocksOffset, values, valuesOffset, longIterations);
        for (long value : values) {
          assertTrue(value <= PackedInts.maxValue(bpv));
        }
@ -738,7 +744,7 @@ public class TestPackedInts extends LuceneTestCase {
        final int[] intValues;
        if (bpv <= 32) {
          intValues = new int[values.length];
-          decoder.decode(blocks, blocksOffset, intValues, valuesOffset, iterations);
+          decoder.decode(blocks, blocksOffset, intValues, valuesOffset, longIterations);
          assertTrue(equals(intValues, values));
        } else {
          intValues = null;
@ -746,21 +752,21 @@ public class TestPackedInts extends LuceneTestCase {

        // 3. re-encode
        final long[] blocks2 = new long[blocksOffset2 + blocksLen];
-        encoder.encode(values, valuesOffset, blocks2, blocksOffset2, iterations);
+        encoder.encode(values, valuesOffset, blocks2, blocksOffset2, longIterations);
        assertArrayEquals(msg, Arrays.copyOfRange(blocks, blocksOffset, blocks.length),
            Arrays.copyOfRange(blocks2, blocksOffset2, blocks2.length));
        // test encoding from int[]
        if (bpv <= 32) {
          final long[] blocks3 = new long[blocks2.length];
-          encoder.encode(intValues, valuesOffset, blocks3, blocksOffset2, iterations);
+          encoder.encode(intValues, valuesOffset, blocks3, blocksOffset2, longIterations);
          assertArrayEquals(msg, blocks2, blocks3);
        }

        // 4. byte[] decoding
        final byte[] byteBlocks = new byte[8 * blocks.length];
        ByteBuffer.wrap(byteBlocks).asLongBuffer().put(blocks);
-        final long[] values2 = new long[valuesOffset + iterations * valueCount];
-        decoder.decode(byteBlocks, blocksOffset * 8, values2, valuesOffset, iterations);
+        final long[] values2 = new long[valuesOffset + longIterations * longValueCount];
+        decoder.decode(byteBlocks, blocksOffset * 8, values2, valuesOffset, byteIterations);
        for (long value : values2) {
          assertTrue(msg, value <= PackedInts.maxValue(bpv));
        }
@ -768,18 +774,18 @@ public class TestPackedInts extends LuceneTestCase {
        // test decoding to int[]
        if (bpv <= 32) {
          final int[] intValues2 = new int[values2.length];
-          decoder.decode(byteBlocks, blocksOffset * 8, intValues2, valuesOffset, iterations);
+          decoder.decode(byteBlocks, blocksOffset * 8, intValues2, valuesOffset, byteIterations);
          assertTrue(msg, equals(intValues2, values2));
        }

        // 5. byte[] encoding
        final byte[] blocks3 = new byte[8 * (blocksOffset2 + blocksLen)];
-        encoder.encode(values, valuesOffset, blocks3, 8 * blocksOffset2, iterations);
+        encoder.encode(values, valuesOffset, blocks3, 8 * blocksOffset2, byteIterations);
        assertEquals(msg, LongBuffer.wrap(blocks2), ByteBuffer.wrap(blocks3).asLongBuffer());
        // test encoding from int[]
        if (bpv <= 32) {
          final byte[] blocks4 = new byte[blocks3.length];
-          encoder.encode(intValues, valuesOffset, blocks4, 8 * blocksOffset2, iterations);
+          encoder.encode(intValues, valuesOffset, blocks4, 8 * blocksOffset2, byteIterations);
          assertArrayEquals(msg, blocks3, blocks4);
        }
      }
--- a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/CategoryPath.java
+++ b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/CategoryPath.java
@ -1,7 +1,6 @@
 package org.apache.lucene.facet.taxonomy;

-import org.apache.lucene.util.Constants;
-
+import java.util.Arrays;

 /*
 * Licensed to the Apache Software Foundation (ASF) under one or more
@ -28,10 +27,6 @@ import org.apache.lucene.util.Constants;
 */
 public class CategoryPath implements Comparable<CategoryPath> {

-  // TODO: revisit when IBM releases Java 7 newer than SR3 (with a fix)
-  // to validate, run e.g. TestAssociationExample with -Dtests.iters=1000
-  private static final boolean IS_J9_JAVA7 = Constants.JRE_IS_MINIMUM_JAVA7 && Constants.JVM_VENDOR.contains("IBM");
-
  /** An empty {@link CategoryPath} */
  public static final CategoryPath EMPTY = new CategoryPath();

@ -48,7 +43,7 @@ public class CategoryPath implements Comparable<CategoryPath> {

  // Used by singleton EMPTY
  private CategoryPath() {
-    components = new String[0];
+    components = null;
    length = 0;
  }

@ -67,16 +62,12 @@ public class CategoryPath implements Comparable<CategoryPath> {
  /** Construct from the given path components. */
  public CategoryPath(final String... components) {
    assert components.length > 0 : "use CategoryPath.EMPTY to create an empty path";
-    if (IS_J9_JAVA7) {
-      // On IBM J9 Java 1.7.0, if we do 'this.components = components', then
-      // at some point its length becomes 0 ... quite unexpectedly. If JIT is
-      // disabled, it doesn't happen. This bypasses the bug by copying the 
-      // array (note, Arrays.copyOf did not help either!).
-      this.components = new String[components.length];
-      System.arraycopy(components, 0, this.components, 0, components.length);
-    } else {
-      this.components = components;
+    for (String comp : components) {
+      if (comp == null || comp.isEmpty()) {
+        throw new IllegalArgumentException("empty or null components not allowed: " + Arrays.toString(components));
+      }
    }
+    this.components = components;
    length = components.length;
  }

@ -84,9 +75,14 @@ public class CategoryPath implements Comparable<CategoryPath> {
  public CategoryPath(final String pathString, final char delimiter) {
    String[] comps = pathString.split(Character.toString(delimiter));
    if (comps.length == 1 && comps[0].isEmpty()) {
-      components = EMPTY.components;
+      components = null;
      length = 0;
    } else {
+      for (String comp : comps) {
+        if (comp == null || comp.isEmpty()) {
+          throw new IllegalArgumentException("empty or null components not allowed: " + Arrays.toString(comps));
+        }
+      }
      components = comps;
      length = components.length;
    }
--- a/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/TestCategoryPath.java
+++ b/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/TestCategoryPath.java
@ -1,5 +1,7 @@
 package org.apache.lucene.facet.taxonomy;

+import java.util.Arrays;
+
 import org.apache.lucene.facet.FacetTestCase;
 import org.junit.Test;

@ -173,9 +175,46 @@ public class TestCategoryPath extends FacetTestCase {
    pother = new CategoryPath("a/b/c/e", '/');
    assertTrue(pother.compareTo(p) > 0);
    assertTrue(p.compareTo(pother) < 0);
-    pother = new CategoryPath("a/b/c//e", '/');
-    assertTrue(pother.compareTo(p) < 0);
-    assertTrue(p.compareTo(pother) > 0);
  }

+  @Test
+  public void testEmptyNullComponents() throws Exception {
+    // LUCENE-4724: CategoryPath should not allow empty or null components
+    String[][] components_tests = new String[][] {
+      new String[] { "", "test" }, // empty in the beginning
+      new String[] { "test", "" }, // empty in the end
+      new String[] { "test", "", "foo" }, // empty in the middle
+      new String[] { null, "test" }, // null at the beginning
+      new String[] { "test", null }, // null in the end
+      new String[] { "test", null, "foo" }, // null in the middle
+    };
+
+    for (String[] components : components_tests) {
+      try {
+        assertNotNull(new CategoryPath(components));
+        fail("empty or null components should not be allowed: " + Arrays.toString(components));
+      } catch (IllegalArgumentException e) {
+        // ok
+      }
+    }
+    
+    String[] path_tests = new String[] {
+        "/test", // empty in the beginning
+        "test//foo", // empty in the middle
+    };
+    
+    for (String path : path_tests) {
+      try {
+        assertNotNull(new CategoryPath(path, '/'));
+        fail("empty or null components should not be allowed: " + path);
+      } catch (IllegalArgumentException e) {
+        // ok
+      }
+    }
+
+    // a trailing path separator is produces only one component
+    assertNotNull(new CategoryPath("test/", '/'));
+    
+  }
+  
 }
--- a/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/writercache/cl2o/TestCompactLabelToOrdinal.java
+++ b/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/writercache/cl2o/TestCompactLabelToOrdinal.java
@ -56,6 +56,12 @@ public class TestCompactLabelToOrdinal extends FacetTestCase {
          .onUnmappableCharacter(CodingErrorAction.REPLACE)
          .onMalformedInput(CodingErrorAction.REPLACE);
      uniqueValues[i] = decoder.decode(ByteBuffer.wrap(buffer, 0, size)).toString();
+      // we cannot have empty path components, so eliminate all prefix as well
+      // as middle consecuive delimiter chars.
+      uniqueValues[i] = uniqueValues[i].replaceAll("/+", "/");
+      if (uniqueValues[i].startsWith("/")) {
+        uniqueValues[i] = uniqueValues[i].substring(1);
+      }
      if (uniqueValues[i].indexOf(CompactLabelToOrdinal.TERMINATOR_CHAR) == -1) {
        i++;
      }
--- a/lucene/join/src/java/org/apache/lucene/search/join/JoinUtil.java
+++ b/lucene/join/src/java/org/apache/lucene/search/join/JoinUtil.java
@ -71,7 +71,7 @@ public final class JoinUtil {
      case None:
        TermsCollector termsCollector = TermsCollector.create(fromField, multipleValuesPerDocument);
        fromSearcher.search(fromQuery, termsCollector);
-        return new TermsQuery(toField, termsCollector.getCollectorTerms());
+        return new TermsQuery(toField, fromQuery, termsCollector.getCollectorTerms());
      case Total:
      case Max:
      case Avg:
--- a/lucene/join/src/java/org/apache/lucene/search/join/TermsIncludingScoreQuery.java
+++ b/lucene/join/src/java/org/apache/lucene/search/join/TermsIncludingScoreQuery.java
@ -92,6 +92,35 @@ class TermsIncludingScoreQuery extends Query {
    }
  }

+  @Override
+  public boolean equals(Object obj) {
+    if (this == obj) {
+      return true;
+    } if (!super.equals(obj)) {
+      return false;
+    } if (getClass() != obj.getClass()) {
+      return false;
+    }
+
+    TermsIncludingScoreQuery other = (TermsIncludingScoreQuery) obj;
+    if (!field.equals(other.field)) {
+      return false;
+    }
+    if (!unwrittenOriginalQuery.equals(other.unwrittenOriginalQuery)) {
+      return false;
+    }
+    return true;
+  }
+
+  @Override
+  public int hashCode() {
+    final int prime = 31;
+    int result = super.hashCode();
+    result += prime * field.hashCode();
+    result += prime * unwrittenOriginalQuery.hashCode();
+    return result;
+  }
+
  @Override
  public Weight createWeight(IndexSearcher searcher) throws IOException {
    final Weight originalWeight = originalQuery.createWeight(searcher);
--- a/lucene/join/src/java/org/apache/lucene/search/join/TermsQuery.java
+++ b/lucene/join/src/java/org/apache/lucene/search/join/TermsQuery.java
@ -21,6 +21,7 @@ import org.apache.lucene.index.FilteredTermsEnum;
 import org.apache.lucene.index.Terms;
 import org.apache.lucene.index.TermsEnum;
 import org.apache.lucene.search.MultiTermQuery;
+import org.apache.lucene.search.Query;
 import org.apache.lucene.util.AttributeSource;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.BytesRefHash;
@ -37,13 +38,15 @@ import java.util.Comparator;
 class TermsQuery extends MultiTermQuery {

  private final BytesRefHash terms;
+  private final Query fromQuery; // Used for equals() only

  /**
   * @param field The field that should contain terms that are specified in the previous parameter
   * @param terms The terms that matching documents should have. The terms must be sorted by natural order.
   */
-  TermsQuery(String field, BytesRefHash terms) {
+  TermsQuery(String field, Query fromQuery, BytesRefHash terms) {
    super(field);
+    this.fromQuery = fromQuery;
    this.terms = terms;
  }

@ -63,6 +66,31 @@ class TermsQuery extends MultiTermQuery {
        '}';
  }

+  @Override
+  public boolean equals(Object obj) {
+    if (this == obj) {
+      return true;
+    } if (!super.equals(obj)) {
+      return false;
+    } if (getClass() != obj.getClass()) {
+      return false;
+    }
+
+    TermsQuery other = (TermsQuery) obj;
+    if (!fromQuery.equals(other.fromQuery)) {
+      return false;
+    }
+    return true;
+  }
+
+  @Override
+  public int hashCode() {
+    final int prime = 31;
+    int result = super.hashCode();
+    result += prime * fromQuery.hashCode();
+    return result;
+  }
+
  static class SeekingTermSetTermsEnum extends FilteredTermsEnum {

    private final BytesRefHash terms;
--- a/lucene/queries/src/java/org/apache/lucene/queries/CommonTermsQuery.java
+++ b/lucene/queries/src/java/org/apache/lucene/queries/CommonTermsQuery.java
@ -74,7 +74,7 @@ public class CommonTermsQuery extends Query {
  protected final Occur highFreqOccur;
  protected float lowFreqBoost = 1.0f;
  protected float highFreqBoost = 1.0f;
-  protected int minNrShouldMatch = 0;
+  protected float minNrShouldMatch = 0;
  
  /**
   * Creates a new {@link CommonTermsQuery}
@ -84,7 +84,7 @@ public class CommonTermsQuery extends Query {
   * @param lowFreqOccur
   *          {@link Occur} used for low frequency terms
   * @param maxTermFrequency
-   *          a value in [0..1] (or absolute number >=1) representing the
+   *          a value in [0..1) (or absolute number >=1) representing the
   *          maximum threshold of a terms document frequency to be considered a
   *          low frequency term.
   * @throws IllegalArgumentException
@ -104,7 +104,7 @@ public class CommonTermsQuery extends Query {
   * @param lowFreqOccur
   *          {@link Occur} used for low frequency terms
   * @param maxTermFrequency
-   *          a value in [0..1] (or absolute number >=1) representing the
+   *          a value in [0..1) (or absolute number >=1) representing the
   *          maximum threshold of a terms document frequency to be considered a
   *          low frequency term.
   * @param disableCoord
@ -160,15 +160,19 @@ public class CommonTermsQuery extends Query {
    return buildQuery(maxDoc, contextArray, queryTerms);
  }
  
+  protected int calcLowFreqMinimumNumberShouldMatch(int numOptional) {
+      if (minNrShouldMatch >= 1.0f || minNrShouldMatch == 0.0f) {
+          return (int) minNrShouldMatch;
+      }
+      return (int) (Math.round(minNrShouldMatch * numOptional));
+  }
+  
  protected Query buildQuery(final int maxDoc,
      final TermContext[] contextArray, final Term[] queryTerms) {
    BooleanQuery lowFreq = new BooleanQuery(disableCoord);
    BooleanQuery highFreq = new BooleanQuery(disableCoord);
    highFreq.setBoost(highFreqBoost);
    lowFreq.setBoost(lowFreqBoost);
-    if (lowFreqOccur == Occur.SHOULD) {
-      lowFreq.setMinimumNumberShouldMatch(minNrShouldMatch);
-    }
    BooleanQuery query = new BooleanQuery(true);
    for (int i = 0; i < queryTerms.length; i++) {
      TermContext termContext = contextArray[i];
@ -186,6 +190,11 @@ public class CommonTermsQuery extends Query {
      }
      
    }
+    final int numLowFreqClauses = lowFreq.clauses().size(); 
+    if (lowFreqOccur == Occur.SHOULD && numLowFreqClauses > 0) {
+      int minMustMatch = calcLowFreqMinimumNumberShouldMatch(numLowFreqClauses);
+      lowFreq.setMinimumNumberShouldMatch(minMustMatch);
+    }
    if (lowFreq.clauses().isEmpty()) {
      /*
       * if lowFreq is empty we rewrite the high freq terms in a conjunction to
@ -265,7 +274,9 @@ public class CommonTermsQuery extends Query {
  /**
   * Specifies a minimum number of the optional BooleanClauses which must be
   * satisfied in order to produce a match on the low frequency terms query
-   * part.
+   * part. This method accepts a float value in the range [0..1) as a fraction
+   * of the actual query terms in the low frequent clause or a number
+   * <tt>&gt;=1</tt> as an absolut number of clauses that need to match.
   * 
   * <p>
   * By default no optional clauses are necessary for a match (unless there are
@ -276,7 +287,7 @@ public class CommonTermsQuery extends Query {
   * @param min
   *          the number of optional clauses that must match
   */
-  public void setMinimumNumberShouldMatch(int min) {
+  public void setMinimumNumberShouldMatch(float min) {
    this.minNrShouldMatch = min;
  }
  
@ -284,7 +295,7 @@ public class CommonTermsQuery extends Query {
   * Gets the minimum number of the optional BooleanClauses which must be
   * satisfied.
   */
-  public int getMinimumNumberShouldMatch() {
+  public float getMinimumNumberShouldMatch() {
    return minNrShouldMatch;
  }
  
@ -332,7 +343,7 @@ public class CommonTermsQuery extends Query {
    result = prime * result
        + ((lowFreqOccur == null) ? 0 : lowFreqOccur.hashCode());
    result = prime * result + Float.floatToIntBits(maxTermFrequency);
-    result = prime * result + minNrShouldMatch;
+    result = prime * result + Float.floatToIntBits(minNrShouldMatch);
    result = prime * result + ((terms == null) ? 0 : terms.hashCode());
    return result;
  }
--- a/lucene/queries/src/test/org/apache/lucene/queries/CommonTermsQueryTest.java
+++ b/lucene/queries/src/test/org/apache/lucene/queries/CommonTermsQueryTest.java
@ -175,6 +175,90 @@ public class CommonTermsQueryTest extends LuceneTestCase {
    }
  }
  
+  public void testMinShouldMatch() throws IOException {
+    Directory dir = newDirectory();
+    RandomIndexWriter w = new RandomIndexWriter(random(), dir);
+    String[] docs = new String[] {"this is the end of the world right",
+        "is this it or maybe not",
+        "this is the end of the universe as we know it",
+        "there is the famous restaurant at the end of the universe",};
+    for (int i = 0; i < docs.length; i++) {
+      Document doc = new Document();
+      doc.add(newStringField("id", "" + i, Field.Store.YES));
+      doc.add(newTextField("field", docs[i], Field.Store.NO));
+      w.addDocument(doc);
+    }
+    
+    IndexReader r = w.getReader();
+    IndexSearcher s = newSearcher(r);
+    {
+      CommonTermsQuery query = new CommonTermsQuery(Occur.SHOULD, Occur.SHOULD,
+          random().nextBoolean() ? 2.0f : 0.5f);
+      query.add(new Term("field", "is"));
+      query.add(new Term("field", "this"));
+      query.add(new Term("field", "end"));
+      query.add(new Term("field", "world"));
+      query.add(new Term("field", "universe"));
+      query.add(new Term("field", "right"));
+      query.setMinimumNumberShouldMatch(0.5f);
+      TopDocs search = s.search(query, 10);
+      assertEquals(search.totalHits, 1);
+      assertEquals("0", r.document(search.scoreDocs[0].doc).get("id"));
+    }
+    {
+      CommonTermsQuery query = new CommonTermsQuery(Occur.SHOULD, Occur.SHOULD,
+          random().nextBoolean() ? 2.0f : 0.5f);
+      query.add(new Term("field", "is"));
+      query.add(new Term("field", "this"));
+      query.add(new Term("field", "end"));
+      query.add(new Term("field", "world"));
+      query.add(new Term("field", "universe"));
+      query.add(new Term("field", "right"));
+      query.setMinimumNumberShouldMatch(2.0f);
+      TopDocs search = s.search(query, 10);
+      assertEquals(search.totalHits, 1);
+      assertEquals("0", r.document(search.scoreDocs[0].doc).get("id"));
+    }
+    
+    {
+      CommonTermsQuery query = new CommonTermsQuery(Occur.SHOULD, Occur.SHOULD,
+          random().nextBoolean() ? 2.0f : 0.5f);
+      query.add(new Term("field", "is"));
+      query.add(new Term("field", "this"));
+      query.add(new Term("field", "end"));
+      query.add(new Term("field", "world"));
+      query.add(new Term("field", "universe"));
+      query.add(new Term("field", "right"));
+      query.setMinimumNumberShouldMatch(0.49f);
+      TopDocs search = s.search(query, 10);
+      assertEquals(search.totalHits, 3);
+      assertEquals("0", r.document(search.scoreDocs[0].doc).get("id"));
+      assertEquals("2", r.document(search.scoreDocs[1].doc).get("id"));
+      assertEquals("3", r.document(search.scoreDocs[2].doc).get("id"));
+    }
+    
+    {
+      CommonTermsQuery query = new CommonTermsQuery(Occur.SHOULD, Occur.SHOULD,
+          random().nextBoolean() ? 2.0f : 0.5f);
+      query.add(new Term("field", "is"));
+      query.add(new Term("field", "this"));
+      query.add(new Term("field", "end"));
+      query.add(new Term("field", "world"));
+      query.add(new Term("field", "universe"));
+      query.add(new Term("field", "right"));
+      query.setMinimumNumberShouldMatch(1.0f);
+      TopDocs search = s.search(query, 10);
+      assertEquals(search.totalHits, 3);
+      assertEquals("0", r.document(search.scoreDocs[0].doc).get("id"));
+      assertEquals("2", r.document(search.scoreDocs[1].doc).get("id"));
+      assertEquals("3", r.document(search.scoreDocs[2].doc).get("id"));
+    }
+   
+    r.close();
+    w.close();
+    dir.close();
+  }
+  
  public void testIllegalOccur() {
    Random random = random();
    
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@ -93,6 +93,8 @@ Bug Fixes
 * SOLR-3926: Solr should support better way of finding active sorts (Eirik Lygre via
  Erick Erickson)

+* SOLR-4342: Fix DataImportHandler stats to be a prper Map (hossman)
+
 Optimizations
 ----------------------

@ -107,6 +109,12 @@ Optimizations

 * SOLR-3915: Color Legend for Cloud UI (steffkes)

+* SOLR-4306: Utilize indexInfo=false when gathering core names in UI
+  (steffkes)
+
+* SOLR-4284: Admin UI - make core list scrollable separate from the rest of
+  the UI (steffkes)
+
 Other Changes
 ----------------------

--- a/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/DataImportHandler.java
+++ b/solr/contrib/dataimporthandler/src/java/org/apache/solr/handler/dataimport/DataImportHandler.java
@ -25,6 +25,7 @@ import org.apache.solr.common.params.SolrParams;
 import org.apache.solr.common.params.UpdateParams;
 import org.apache.solr.common.util.ContentStreamBase;
 import org.apache.solr.common.util.NamedList;
+import org.apache.solr.common.util.SimpleOrderedMap;
 import org.apache.solr.common.util.ContentStream;
 import org.apache.solr.common.util.StrUtils;
 import org.apache.solr.util.SystemIdResolver;
@ -247,7 +248,7 @@ public class DataImportHandler extends RequestHandlerBase implements
      return super.getStatistics();

    DocBuilder.Statistics cumulative = importer.cumulativeStatistics;
-    NamedList result = new NamedList();
+    SimpleOrderedMap result = new SimpleOrderedMap();

    result.add("Status", importer.getStatus().toString());

--- a/solr/site/html/tutorial.html
+++ b/solr/site/html/tutorial.html
@ -310,7 +310,7 @@ make many changes to an index in a batch and then send the
 There is also an <span class="codefrag">optimize</span> command that does the 
 same things as <span class="codefrag">commit</span>, but also forces all index 
 segments to be merged into a single segment -- this can be very resource 
-intsenive, but may be worthwhile for improving search speed if your index 
+intensive, but may be worthwhile for improving search speed if your index
 changes very infrequently.
 </p>
 <p>
@ -411,7 +411,7 @@ and is useful when testing or debugging queries.
 <h2 class="boxed">Highlighting</h2>
 <div class="section">
 <p>
-    Hit highlighting returns relevent snippets of each returned document, and highlights
+    Hit highlighting returns relevant snippets of each returned document, and highlights
    terms from the query within those context snippets.
  </p>
 <p>
@ -522,7 +522,7 @@ Try it out at
 <p>
    The <a href="http://wiki.apache.org/solr/SchemaXml">schema</a> defines
    the fields in the index and what type of analysis is applied to them.  The current schema your collection is using
-    may be viewed directly via the <a href="http://localhost:8983/solr/#/collection1/schema">Schema tab</a> in the Admin UI, or explored dynamicly using the <a href="http://localhost:8983/solr/#/collection1/schema-browser">Schema Browser tab</a>.
+    may be viewed directly via the <a href="http://localhost:8983/solr/#/collection1/schema">Schema tab</a> in the Admin UI, or explored dynamically using the <a href="http://localhost:8983/solr/#/collection1/schema-browser">Schema Browser tab</a>.
 </p>
 <p>
 The best analysis components (tokenization and filtering) for your textual 
@ -616,7 +616,7 @@ Mousing over the section label to the left of the section will display the full
 <p>
 When both <a href="http://localhost:8983/solr/#/collection1/analysis?analysis.fieldvalue=Canon+Power-Shot+SD500&amp;analysis.query=power+shot+sd-500&amp;analysis.fieldtype=text_en_splitting&amp;verbose_output=0">Index and Query</a>
 values are provided, two tables will be displayed side by side showing the 
-results of each chain.  Terms in the Index chain results that are equivilent 
+results of each chain.  Terms in the Index chain results that are equivalent
 to the final terms produced by the Query chain will be highlighted.
 </p>
 <p>
--- a/solr/webapp/web/admin.html
+++ b/solr/webapp/web/admin.html
@ -111,8 +111,12 @@ limitations under the License.
            
          </ul>

-          <ul id="menu-selector">
-          </ul>
+          <div id="core-selector">
+            <select data-placeholder="Core Selector"></select>
+          </div>
+          <div id="core-menu">
+            <ul></ul>
+          </div>
                  
        </div>
      </div>
--- a/solr/webapp/web/css/styles/common.css
+++ b/solr/webapp/web/css/styles/common.css
@ -172,7 +172,7 @@ ul
 #header
 {
  padding-bottom: 10px;
-  position: absolute;
+  position: fixed;
  z-index: 42;
 }

@ -340,12 +340,6 @@ ul
  width: 100%;
 }

-#content > pre
-{
-  max-height: 600px;
-  overflow: auto;
-}
-
 #content .block
 {
  margin-bottom: 10px;
--- a/solr/webapp/web/css/styles/menu.css
+++ b/solr/webapp/web/css/styles/menu.css
@ -1,13 +1,13 @@
 #menu-wrapper
 {
-  position: absolute;
-  top: 90px;
+  position: fixed;
+  top: 120px;
  width: 150px;
 }

 .has-environment #menu-wrapper
 {
-  top: 130px;
+  top: 160px;
 }

 #menu-wrapper a
@ -18,6 +18,23 @@
  text-overflow: ellipsis;
 }

+#core-selector
+{
+  margin-top: 20px;
+  padding-right: 10px;
+}
+
+#core-selector a
+{
+  padding: 0;
+  padding-left: 8px;
+}
+
+#core-selector select
+{
+  width: 100%;
+}
+
 #menu-wrapper .active p
 {
  background-color: #fafafa;
@ -121,32 +138,27 @@
  display: none;
 }

-#menu-selector
-{
-  margin-top: 20px;
-}
-
-#menu-selector p
+#core-menu p
 {
  border-top: 1px solid #f0f0f0;
 }

-#menu-selector li:first-child p
+#core-menu li:first-child p
 {
  border-top: 0;
 }

-#menu-selector p a
+#core-menu p a
 {
  background-image: url( ../../img/ico/status-offline.png );
 }

-#menu-selector .active p a
+#core-menu .active p a
 {
  background-image: url( ../../img/ico/box.png );
 }

-#menu-selector ul,
+#core-menu ul,
 #menu ul
 {
  display: none;
@ -154,7 +166,7 @@
  padding-bottom: 10px;
 }

-#menu-selector .active ul,
+#core-menu .active ul,
 #menu .active ul
 {
  display: block;
@ -165,7 +177,7 @@
  border-bottom: 0;
 }

-#menu-selector ul li a,
+#core-menu ul li a,
 #menu ul li a
 {
  background-position: 7px 50%;
@ -175,20 +187,20 @@
  padding-left: 26px;
 }

-#menu-selector ul li:last-child a,
+#core-menu ul li:last-child a,
 #menu ul li:last-child a
 {
  border-bottom: 0;
 }

-#menu-selector ul li a:hover,
+#core-menu ul li a:hover,
 #menu ul li a:hover
 {
  background-color: #f0f0f0;
  color: #333;
 }

-#menu-selector ul li.active a,
+#core-menu ul li.active a,
 #menu ul li.active a
 {
  background-color: #d0d0d0;
@ -213,7 +225,7 @@
 #menu #cloud.global .rgraph a { background-image: url( ../../img/ico/asterisk.png ); }
 #menu #cloud.global .dump a { background-image: url( ../../img/ico/download-cloud.png ); }

-#menu-selector .ping.error a
+#core-menu .ping.error a
 {
  
  background-color: #ffcccc;
@ -222,17 +234,18 @@
  cursor: help;
 }

-#menu-selector .query a { background-image: url( ../../img/ico/magnifier.png ); }
-#menu-selector .schema a { background-image: url( ../../img/ico/table.png ); }
-#menu-selector .config a { background-image: url( ../../img/ico/gear.png ); }
-#menu-selector .analysis a { background-image: url( ../../img/ico/funnel.png ); }
-#menu-selector .schema-browser a { background-image: url( ../../img/ico/book-open-text.png ); }
-#menu-selector .replication a { background-image: url( ../../img/ico/node.png ); }
-#menu-selector .distribution a { background-image: url( ../../img/ico/node-select.png ); }
-#menu-selector .ping a { background-image: url( ../../img/ico/system-monitor.png ); }
-#menu-selector .logging a { background-image: url( ../../img/ico/inbox-document-text.png ); }
-#menu-selector .plugins a { background-image: url( ../../img/ico/block.png ); }
-#menu-selector .dataimport a { background-image: url( ../../img/ico/document-import.png ); }
+#core-menu .overview a { background-image: url( ../../img/ico/home.png ); }
+#core-menu .query a { background-image: url( ../../img/ico/magnifier.png ); }
+#core-menu .schema a { background-image: url( ../../img/ico/table.png ); }
+#core-menu .config a { background-image: url( ../../img/ico/gear.png ); }
+#core-menu .analysis a { background-image: url( ../../img/ico/funnel.png ); }
+#core-menu .schema-browser a { background-image: url( ../../img/ico/book-open-text.png ); }
+#core-menu .replication a { background-image: url( ../../img/ico/node.png ); }
+#core-menu .distribution a { background-image: url( ../../img/ico/node-select.png ); }
+#core-menu .ping a { background-image: url( ../../img/ico/system-monitor.png ); }
+#core-menu .logging a { background-image: url( ../../img/ico/inbox-document-text.png ); }
+#core-menu .plugins a { background-image: url( ../../img/ico/block.png ); }
+#core-menu .dataimport a { background-image: url( ../../img/ico/document-import.png ); }


 #content #navigation
--- a/solr/webapp/web/css/styles/schema-browser.css
+++ b/solr/webapp/web/css/styles/schema-browser.css
@ -545,6 +545,7 @@
  clear: left;
  float: left;
  margin-left: 2px;
+  white-space: nowrap;
 }

 #content #schema-browser #data #field .histogram-holder li:hover dl
--- a/solr/webapp/web/img/ico/home.png
+++ b/solr/webapp/web/img/ico/home.png
--- a/solr/webapp/web/js/scripts/app.js
+++ b/solr/webapp/web/js/scripts/app.js
@ -92,20 +92,26 @@ var sammy = $.sammy
        $( 'li.active', menu_wrapper )
          .removeClass( 'active' );

-        if( this.params.splat )
+        // global dashboard doesn't have params.splat
+        if( !this.params.splat )
        {
-          var selector = '~' === this.params.splat[0][0]
-                       ? '#' + this.params.splat[0].replace( /^~/, '' ) + '.global'
-                       : '#menu-selector #' + this.params.splat[0].replace( /\./g, '__' );
+          this.params.splat = [ '~index' ];
+        }

-          var active_element = $( selector, menu_wrapper );
-                    
-          if( 0 === active_element.size() )
-          {
-            this.app.error( 'There exists no core with name "' + this.params.splat[0] + '"' );
-            return false;
-          }
+        var selector = '~' === this.params.splat[0][0]
+                     ? '#' + this.params.splat[0].replace( /^~/, '' ) + '.global'
+                     : '#core-selector #' + this.params.splat[0].replace( /\./g, '__' );

+        var active_element = $( selector, menu_wrapper );
+                  
+        if( 0 === active_element.size() )
+        {
+          this.app.error( 'There exists no core with name "' + this.params.splat[0] + '"' );
+          return false;
+        }
+
+        if( active_element.hasClass( 'global' ) )
+        {
          active_element
            .addClass( 'active' );

@ -115,10 +121,28 @@ var sammy = $.sammy
              .addClass( 'active' );
          }

-          if( !active_element.hasClass( 'global' ) )
+          $( '#core-selector option[selected]' )
+            .removeAttr( 'selected' )
+            .trigger( 'liszt:updated' );
+
+          $( '#core-selector .chzn-container > a' )
+            .addClass( 'chzn-default' );
+        }
+        else
+        {
+          active_element
+            .attr( 'selected', 'selected' )
+            .trigger( 'liszt:updated' );
+
+          if( !this.params.splat[1] )
          {
-            this.active_core = active_element;
+            this.params.splat[1] = 'overview';
          }
+
+          $( '#core-menu .' + this.params.splat[1] )
+            .addClass( 'active' );
+
+          this.active_core = active_element;
        }
      }
    );
@ -143,9 +167,10 @@ var solr_admin = function( app_config )

  plugin_data = null,
    
-  this.menu_element = $( '#menu-selector' );
-  this.config = config;
+  this.menu_element = $( '#core-selector select' );
+  this.core_menu = $( '#core-menu ul' );

+  this.config = config;
  this.timeout = null;

  this.core_regex_base = '^#\\/([\\w\\d-\\.]+)';
@ -197,6 +222,9 @@ var solr_admin = function( app_config )
    that.menu_element
      .empty();

+    var core_list = [];
+    core_list.push( '<option></option>' );
+
    var core_count = 0;
    for( var core_name in that.cores_data )
    {
@ -214,32 +242,24 @@ var solr_admin = function( app_config )
        classes.push( 'default' );
      }

-      var core_tpl = '<li id="' + core_name.replace( /\./g, '__' ) + '" '
+      var core_tpl = '<option '
+                   + '    id="' + core_name.replace( /\./g, '__' ) + '" '
                   + '    class="' + classes.join( ' ' ) + '"'
                   + '    data-basepath="' + core_path + '"'
                   + '    schema="' + cores.status[core_name]['schema'] + '"'
                   + '    config="' + cores.status[core_name]['config'] + '"'
-                   + '>' + "\n"
-                   + '  <p><a href="#/' + core_name + '" title="' + core_name + '">' + core_name + '</a></p>' + "\n"
-                   + '  <ul>' + "\n"
+                   + '    value="#/' + core_name + '"'
+                   + '    title="' + core_name + '"'
+                   + '>' 
+                   + core_name 
+                   + '</option>';

-                   + '    <li class="ping"><a rel="' + core_path + '/admin/ping"><span>Ping</span></a></li>' + "\n"
-                   + '    <li class="query"><a href="#/' + core_name + '/query"><span>Query</span></a></li>' + "\n"
-                   + '    <li class="schema"><a href="#/' + core_name + '/schema"><span>Schema</span></a></li>' + "\n"
-                   + '    <li class="config"><a href="#/' + core_name + '/config"><span>Config</span></a></li>' + "\n"
-                   + '    <li class="replication"><a href="#/' + core_name + '/replication"><span>Replication</span></a></li>' + "\n"
-                   + '    <li class="analysis"><a href="#/' + core_name + '/analysis"><span>Analysis</span></a></li>' + "\n"
-                   + '    <li class="schema-browser"><a href="#/' + core_name + '/schema-browser"><span>Schema Browser</span></a></li>' + "\n"
-                   + '    <li class="plugins"><a href="#/' + core_name + '/plugins"><span>Plugins / Stats</span></a></li>' + "\n"
-                   + '    <li class="dataimport"><a href="#/' + core_name + '/dataimport"><span>Dataimport</span></a></li>' + "\n"
-
-                   + '    </ul>' + "\n"
-                   + '</li>';
-
-      that.menu_element
-        .append( core_tpl );
+      core_list.push( core_tpl );
    }

+    that.menu_element
+      .append( core_list.join( "\n" ) );
+
    if( cores.initFailures )
    {
      var failures = [];
@ -277,7 +297,7 @@ var solr_admin = function( app_config )
    $.ajax
    (
      {
-        url : config.solr_path + config.core_admin_path + '?wt=json',
+        url : config.solr_path + config.core_admin_path + '?wt=json&indexInfo=false',
        dataType : 'json',
        beforeSend : function( arr, form, options )
        {               
@ -288,6 +308,52 @@ var solr_admin = function( app_config )
        {
          that.set_cores_data( response );

+          that.menu_element
+            .chosen()
+            .off( 'change' )
+            .on
+            (
+              'change',
+              function( event )
+              {
+                location.href = $( 'option:selected', this ).val();
+                return false;
+              }
+            )
+            .on
+            (
+              'liszt:updated',
+              function( event )
+              {
+                var core_name = $( 'option:selected', this ).text();
+
+                if( core_name )
+                {
+                  that.core_menu
+                    .html
+                    (
+                      '<li class="overview"><a href="#/' + core_name + '"><span>Overview</span></a></li>' + "\n" +
+                      '<li class="ping"><a rel="' + that.config.solr_path + '/' + core_name + '/admin/ping"><span>Ping</span></a></li>' + "\n" +
+                      '<li class="query"><a href="#/' + core_name + '/query"><span>Query</span></a></li>' + "\n" +
+                      '<li class="schema"><a href="#/' + core_name + '/schema"><span>Schema</span></a></li>' + "\n" +
+                      '<li class="config"><a href="#/' + core_name + '/config"><span>Config</span></a></li>' + "\n" +
+                      '<li class="replication"><a href="#/' + core_name + '/replication"><span>Replication</span></a></li>' + "\n" +
+                      '<li class="analysis"><a href="#/' + core_name + '/analysis"><span>Analysis</span></a></li>' + "\n" +
+                      '<li class="schema-browser"><a href="#/' + core_name + '/schema-browser"><span>Schema Browser</span></a></li>' + "\n" + 
+                      '<li class="plugins"><a href="#/' + core_name + '/plugins"><span>Plugins / Stats</span></a></li>' + "\n" +
+                      '<li class="dataimport"><a href="#/' + core_name + '/dataimport"><span>Dataimport</span></a></li>' + "\n"
+                    )
+                    .show();
+                }
+                else
+                {
+                  that.core_menu
+                    .hide()
+                    .empty();
+                }
+              }
+            );
+
          for( var core_name in response.status )
          {
            var core_path = config.solr_path + '/' + core_name;
--- a/solr/webapp/web/js/scripts/cores.js
+++ b/solr/webapp/web/js/scripts/cores.js
@ -20,12 +20,6 @@ sammy.bind
  'cores_load_data',
  function( event, params )
  {
-    if( app.cores_data )
-    {
-      params.callback( app.cores_data );
-      return true;
-    }
-
    $.ajax
    (
      {
@ -335,7 +329,7 @@ sammy.get
                  .ajaxForm
                  (
                    {
-                      url : app.config.solr_path + app.config.core_admin_path + '?wt=json',
+                      url : app.config.solr_path + app.config.core_admin_path + '?wt=json&indexInfo=false',
                      dataType : 'json',
                      beforeSubmit : function( array, form, options )
                      {
--- a/solr/webapp/web/js/scripts/index.js
+++ b/solr/webapp/web/js/scripts/index.js
@ -208,9 +208,6 @@ sammy.get
  {
    var content_element = $( '#content' );

-    $( '#menu-wrapper #index' )
-      .addClass( 'active' );
-
    content_element
      .html( '<div id="index"></div>' );

--- a/solr/webapp/web/js/scripts/java-properties.js
+++ b/solr/webapp/web/js/scripts/java-properties.js
@ -21,7 +21,7 @@ sammy.get
  /^#\/(~java-properties)$/,
  function( context )
  {
-    var core_basepath = $( 'li[data-basepath]', app.menu_element ).attr( 'data-basepath' );
+    var core_basepath = $( '[data-basepath]', app.menu_element ).attr( 'data-basepath' );
    var content_element = $( '#content' );

    content_element
--- a/solr/webapp/web/js/scripts/logging.js
+++ b/solr/webapp/web/js/scripts/logging.js
@ -406,7 +406,7 @@ sammy.get
  /^#\/(~logging)$/,
  function( context )
  {
-    var core_basepath = $( 'li[data-basepath]', app.menu_element ).attr( 'data-basepath' );
+    var core_basepath = $( '[data-basepath]', app.menu_element ).attr( 'data-basepath' );
    loglevel_path = core_basepath + '/admin/logging';
    var content_element = $( '#content' );

@ -492,7 +492,7 @@ sammy.get
  /^#\/(~logging)\/level$/,
  function( context )
  {
-    var core_basepath = $( 'li[data-basepath]', app.menu_element ).attr( 'data-basepath' );
+    var core_basepath = $( '[data-basepath]', app.menu_element ).attr( 'data-basepath' );
    loglevel_path = core_basepath + '/admin/logging';
    var content_element = $( '#content' );

--- a/solr/webapp/web/js/scripts/ping.js
+++ b/solr/webapp/web/js/scripts/ping.js
@ -15,7 +15,7 @@
 limitations under the License.
 */

-$( '.ping a', app.menu_element )
+$( '.ping a', app.core_menu )
  .live
  (
    'click',
--- a/solr/webapp/web/js/scripts/schema-browser.js
+++ b/solr/webapp/web/js/scripts/schema-browser.js
@ -228,7 +228,7 @@ sammy.bind
    var related_select_element = $( '#related select', params.schema_browser_element )
    var type = 'index';

-    var sammy_basepath = '#/' + $( 'p a', params.active_core ).html() + '/schema-browser';
+    var sammy_basepath = app.core_menu.find( '.active a' ).attr( 'href' );
        
    if( !related_navigation_meta.hasClass( 'done' ) )
    {
@ -640,7 +640,7 @@ sammy.bind
                    }

                    related_select_element
-                      .attr( 'rel', '#/' + $( 'p a', params.active_core ).html() + '/schema-browser' )
+                      .attr( 'rel', app.core_menu.find( '.active a' ).attr( 'href' ) )
                      .append( related_options )
                      .chosen();
                                            
--- a/solr/webapp/web/js/scripts/threads.js
+++ b/solr/webapp/web/js/scripts/threads.js
@ -21,7 +21,7 @@ sammy.get
  /^#\/(~threads)$/,
  function( context )
  {
-    var core_basepath = $( 'li[data-basepath]', app.menu_element ).attr( 'data-basepath' );
+    var core_basepath = $( '[data-basepath]', app.menu_element ).attr( 'data-basepath' );
    var content_element = $( '#content' );

    $.get