Merged /lucene/dev/trunk:r1432062-1433030

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene4547@1433035 13f79535-47bb-0310-9956-ffa450edef68
2013-01-14 18:54:22 +00:00 · 2013-01-14 18:54:22 +00:00 · b6c9791358
parent f9bdb3cfd1 32e87ed084
commit b6c9791358
115 changed files with 2839 additions and 1117 deletions
--- a/dev-tools/scripts/checkJavadocLinks.py
+++ b/dev-tools/scripts/checkJavadocLinks.py
@ -197,6 +197,9 @@ def checkAll(dirName):
        elif link.find('lucene.apache.org/java/docs/discussion.html') != -1:
          # OK
          pass
+        elif link.find('lucene.apache.org/core/discussion.html') != -1:
+          # OK
+          pass
        elif link.find('lucene.apache.org/solr/mirrors-solr-latest-redir.html') != -1:
          # OK
          pass
--- a/dev-tools/scripts/smokeTestRelease.py
+++ b/dev-tools/scripts/smokeTestRelease.py
@ -308,7 +308,7 @@ def checkSigs(project, urlString, version, tmpDir, isSigned):
      artifact = text
      artifactURL = subURL
      if project == 'solr':
-        expected = 'apache-solr-%s' % version
+        expected = 'solr-%s' % version
      else:
        expected = 'lucene-%s' % version
      if not artifact.startswith(expected):
@ -334,9 +334,9 @@ def checkSigs(project, urlString, version, tmpDir, isSigned):
                'lucene-%s.tgz' % version,
                'lucene-%s.zip' % version]
  else:
-    expected = ['apache-solr-%s-src.tgz' % version,
-                'apache-solr-%s.tgz' % version,
-                'apache-solr-%s.zip' % version]
+    expected = ['solr-%s-src.tgz' % version,
+                'solr-%s.tgz' % version,
+                'solr-%s.zip' % version]

  actual = [x[0] for x in artifacts]
  if expected != actual:
@ -556,9 +556,6 @@ def unpackAndVerify(project, tmpDir, artifact, version):

  # make sure it unpacks to proper subdir
  l = os.listdir(destDir)
-  if project == 'solr':
-    expected = 'apache-%s-%s' % (project, version)
-  else:
  expected = '%s-%s' % (project, version)
  if l != [expected]:
    raise RuntimeError('unpack produced entries %s; expected only %s' % (l, expected))
@ -956,7 +953,6 @@ def getDistributionsForMavenChecks(tmpDir, version, baseURL):
  distributionFiles = defaultdict()
  for project in ('lucene', 'solr'):
    distribution = '%s-%s.tgz' % (project, version)
-    if project == 'solr': distribution = 'apache-' + distribution
    if not os.path.exists('%s/%s' % (tmpDir, distribution)):
      distURL = '%s/%s/%s' % (baseURL, project, distribution)
      print('    download %s...' % distribution, end=' ')
@ -1010,8 +1006,6 @@ def checkIdenticalMavenArtifacts(distributionFiles, nonMavenizedDeps, artifacts,
    distFilenames = dict()
    for file in distributionFiles[project]:
      baseName = os.path.basename(file)
-      if project == 'solr': # Remove 'apache-' prefix to allow comparison to Maven artifacts
-        baseName = baseName.replace('apache-', '')
      distFilenames[baseName] = file
    for artifact in artifacts[project]:
      if reJarWar.search(artifact):
@ -1348,9 +1342,9 @@ def smokeTest(baseURL, version, tmpDir, isSigned):
  print()
  print('Test Solr...')
  checkSigs('solr', solrPath, version, tmpDir, isSigned)
-  for artifact in ('apache-solr-%s.tgz' % version, 'apache-solr-%s.zip' % version):
+  for artifact in ('solr-%s.tgz' % version, 'solr-%s.zip' % version):
    unpackAndVerify('solr', tmpDir, artifact, version)
-  unpackAndVerify('solr', tmpDir, 'apache-solr-%s-src.tgz' % version, version)
+  unpackAndVerify('solr', tmpDir, 'solr-%s-src.tgz' % version, version)

  print()
  print('Test Maven artifacts for Lucene and Solr...')
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@ -19,6 +19,16 @@ Changes in backwards compatibility policy
  (Nikola Tanković, Uwe Schindler, Chris Male, Mike McCandless,
  Robert Muir)

+* LUCENE-4677, LUCENE-4682: unpacked FSTs now use vInt to encode the node target,
+  to reduce their size (Mike McCandless)
+
+* LUCENE-4678: FST now uses a paged byte[] structure instead of a
+  single byte[] internally, to avoid large memory spikes during
+  building (James Dyer, Mike McCandless)
+
+* LUCENE-3298: FST can now be larger than 2.1 GB / 2.1 B nodes.
+  (James Dyer, Mike McCandless)
+
 ======================= Lucene 4.1.0 =======================

 Changes in backwards compatibility policy
@ -45,7 +55,7 @@ Changes in backwards compatibility policy
    Instead of calling refresh(), you should write similar code to how you reopen
    a regular DirectoryReader.
  - TaxonomyReader.openIfChanged (previously refresh()) no longer throws
-    IncosistentTaxonomyException, and supports recreate. InconsistentTaxoEx
+    InconsistentTaxonomyException, and supports recreate. InconsistentTaxoEx
    was removed.
  - ChildrenArrays was pulled out of TaxonomyReader into a top-level class.
  - TaxonomyReader was made an abstract class (instead of an interface), with
@ -94,7 +104,7 @@ Changes in backwards compatibility policy
  Also, the entire IndexingParams chain is now immutable. If you need to override
  a setting, you should extend the relevant class.
  Additionally, FacetSearchParams is now immutable, and requires all FacetRequests
-  to speified at initialization time. (Shai Erera)
+  to specified at initialization time. (Shai Erera)

 * LUCENE-4647: CategoryDocumentBuilder and EnhancementsDocumentBuilder are replaced
  by FacetFields and AssociationsFacetFields respectively. CategoryEnhancement and
@ -115,6 +125,10 @@ Changes in backwards compatibility policy
  result, few other classes such as Aggregator and CategoryListIterator were
  changed to handle bulk category ordinals. (Shai Erera)

+* LUCENE-4683: CategoryListIterator and Aggregator are now per-segment. As such
+  their implementations no longer take a top-level IndexReader in the constructor
+  but rather implement a setNextReader. (Shai Erera)
+  
 New Features

 * LUCENE-4226: New experimental StoredFieldsFormat that compresses chunks of
@ -152,11 +166,6 @@ New Features
 * LUCENE-4515: MemoryIndex now supports adding the same field multiple
  times. (Simon Willnauer)

-* LUCENE-4540: Added an experimental Norm.setPackedLong, which allows
-  the use of VAR_INTS-encoded norms. This can be useful for cases where
-  you only need a few bits per-document, or where you might want exact
-  document length, and so on.  (Robert Muir)
-
 * LUCENE-4489: Added consumeAllTokens option to LimitTokenCountFilter
  (hossman, Robert Muir)

@ -267,7 +276,7 @@ Bug Fixes
  allow 1+maxMergeCount merges threads to be created, instead of just
  maxMergeCount (Radim Kolar, Mike McCandless)

-* LUCENE-4567: Fixed NullPointerException in analzying, fuzzy, and
+* LUCENE-4567: Fixed NullPointerException in analyzing, fuzzy, and
  WFST suggesters when no suggestions were added (selckin via Mike
  McCandless)

@ -527,7 +536,7 @@ API Changes
  StoredFieldVisitor API.  (Mike McCandless)

 * LUCENE-4343: Made Tokenizer.setReader final. This is a setter that should
-  not be overriden by subclasses: per-stream initialization should happen
+  not be overridden by subclasses: per-stream initialization should happen
  in reset().  (Robert Muir)

 * LUCENE-4377: Remove IndexInput.copyBytes(IndexOutput, long). 
@ -753,7 +762,7 @@ API Changes

 * LUCENE-4273: When pulling a DocsEnum, you can pass an int flags
  instead of the previous boolean needsFlags; consistent with the changes
-  for DocsAndPositionsEnum in LUCENE-4230. Currently othe only flag
+  for DocsAndPositionsEnum in LUCENE-4230. Currently the only flag
  is DocsEnum.FLAG_FREQS. (Robert Muir, Mike McCandless)
  
 * LUCENE-3616: TextField(String, Reader, Store) was reduced to TextField(String, Reader),
@ -825,7 +834,7 @@ Bug Fixes
  instance are already checked out and queued up but not yet flushed. 
  (Simon Willnauer)

-* LUCENE-4282: Automaton FuzzyQuery didnt always deliver all results.
+* LUCENE-4282: Automaton FuzzyQuery didn't always deliver all results.
  (Johannes Christen, Uwe Schindler, Robert Muir)

 * LUCENE-4289: Fix minor idf inconsistencies/inefficiencies in highlighter.
@ -1055,7 +1064,7 @@ Changes in backwards compatibility policy
  Query/Weight/Scorer. If you extended Similarity directly before, you should 
  extend TFIDFSimilarity instead.  Similarity is now a lower-level API to 
  implement other scoring algorithms.  See MIGRATE.txt for more details.
-  (David Nemeskey, Simon Willnauer, Mike Mccandless, Robert Muir)
+  (David Nemeskey, Simon Willnauer, Mike McCandless, Robert Muir)

 * LUCENE-3330: The expert visitor API in Scorer has been simplified and
  extended to support arbitrary relationships. To navigate to a scorer's 
@ -1163,12 +1172,12 @@ Changes in Runtime Behavior
  omitNorms(true) for field "a" for 1000 documents, but then add a document with
  omitNorms(false) for field "a", all documents for field "a" will have no 
  norms.  Previously, Lucene would fill the first 1000 documents with 
-  "fake norms" from Similarity.getDefault(). (Robert Muir, Mike Mccandless)
+  "fake norms" from Similarity.getDefault(). (Robert Muir, Mike McCandless)

 * LUCENE-2846: When some documents contain field "a", and others do not, the
  documents that don't have the field get a norm byte value of 0. Previously, 
  Lucene would populate "fake norms" with Similarity.getDefault() for these 
-  documents.  (Robert Muir, Mike Mccandless)
+  documents.  (Robert Muir, Mike McCandless)
  
 * LUCENE-2720: IndexWriter throws IndexFormatTooOldException on open, rather 
  than later when e.g. a merge starts. 
@ -1201,13 +1210,13 @@ Changes in Runtime Behavior
    update or delete on IndexWriter. By default DWPTs are flushed either on
    maxBufferedDocs per DWPT or the global active used memory. Once the active
    memory exceeds ramBufferSizeMB only the largest DWPT is selected for
-    flushing and the memory used by this DWPT is substracted from the active
+    flushing and the memory used by this DWPT is subtracted from the active
    memory and added to a flushing memory pool, which can lead to temporarily
    higher memory usage due to ongoing indexing.
    
  - IndexWriter now can utilize ramBufferSize > 2048 MB. Each DWPT can address
    up to 2048 MB memory such that the ramBufferSize is now bounded by the max
-    number of DWPT avaliable in the used DocumentsWriterPerThreadPool.
+    number of DWPT available in the used DocumentsWriterPerThreadPool.
    IndexWriters net memory consumption can grow far beyond the 2048 MB limit if
    the application can use all available DWPTs. To prevent a DWPT from
    exhausting its address space IndexWriter will forcefully flush a DWPT if its
@ -1215,7 +1224,7 @@ Changes in Runtime Behavior
    via IndexWriterConfig and defaults to 1945 MB. 
    Since IndexWriter flushes DWPT concurrently not all memory is released
    immediately. Applications should still use a ramBufferSize significantly
-    lower than the JVMs avaliable heap memory since under high load multiple
+    lower than the JVMs available heap memory since under high load multiple
    flushing DWPT can consume substantial transient memory when IO performance
    is slow relative to indexing rate.
    
@ -1223,7 +1232,7 @@ Changes in Runtime Behavior
    'currently' RAM resident documents to disk. Yet, flushes that occur while a
    a full flush is running are queued and will happen after all DWPT involved
    in the full flush are done flushing. Applications using multiple threads
-    during indexing and trigger a full flush (eg call commmit() or open a new
+    during indexing and trigger a full flush (eg call commit() or open a new
    NRT reader) can use significantly more transient memory.
    
  - IndexWriter#addDocument and IndexWriter.updateDocument can block indexing
@ -1266,7 +1275,7 @@ Changes in Runtime Behavior

 * LUCENE-3455: QueryParserBase.newFieldQuery() will throw a ParseException if
  any of the calls to the Analyzer throw an IOException.  QueryParseBase.analyzeRangePart()
-  will throw a RuntimException if an IOException is thrown by the Analyzer.
+  will throw a RuntimeException if an IOException is thrown by the Analyzer.

 * LUCENE-4127: IndexWriter will now throw IllegalArgumentException if
  the first token of an indexed field has 0 positionIncrement
@ -1356,7 +1365,7 @@ API Changes
  customized on a per-field basis.  (Robert Muir)

 * LUCENE-3308: DuplicateFilter keepMode and processingMode have been converted to
-  enums DuplicateFilter.KeepMode and DuplicateFilter.ProcessingMode repsectively.
+  enums DuplicateFilter.KeepMode and DuplicateFilter.ProcessingMode respectively.

 * LUCENE-3483: Move Function grouping collectors from Solr to grouping module.
  (Martijn van Groningen)
@ -1514,7 +1523,7 @@ New features

 * LUCENE-2742: Add native per-field postings format support. Codec lets you now
  register a postings format for each field and which is in turn recorded 
-  into the index. Postings formtas are maintained on a per-segment basis and be
+  into the index. Postings formats are maintained on a per-segment basis and be
  resolved without knowing the actual postings format used for writing the segment.
  (Simon Willnauer)

@ -1722,7 +1731,7 @@ New features
   - o.a.l.analysis.miscellaneous.CapitalizationFilter: A TokenFilter that applies
     capitalization rules to tokens.
   - o.a.l.analysis.pattern: Package for pattern-based analysis, containing a 
-     CharFilter, Tokenizer, and Tokenfilter for transforming text with regexes.
+     CharFilter, Tokenizer, and TokenFilter for transforming text with regexes.
   - o.a.l.analysis.synonym.SynonymFilter: A synonym filter that supports multi-word
     synonyms.
   - o.a.l.analysis.phonetic: Package for phonetic search, containing various
@ -1894,7 +1903,7 @@ Bug fixes
  DocsAndPositionsEnum while merging (Marc Sturlese, Erick Erickson,
  Robert Muir, Simon Willnauer, Mike McCandless)

-* LUCENE-3589: BytesRef copy(short) didnt set length.
+* LUCENE-3589: BytesRef copy(short) didn't set length.
  (Peter Chang via Robert Muir)

 * LUCENE-3045: fixed QueryNodeImpl.containsTag(String key) that was
@ -1997,6 +2006,51 @@ Build
  XSL.  (Greg Bowyer, Uwe Schindler)


+======================= Lucene 3.6.2 =======================
+
+Bug Fixes
+
+* LUCENE-4234: Exception when FacetsCollector is used with ScoreFacetRequest,
+  and the number of matching documents is too large. (Gilad Barkai via Shai Erera)
+
+* LUCENE-2686, LUCENE-3505, LUCENE-4401: Fix BooleanQuery scorers to
+  return correct freq().
+  (Koji Sekiguchi, Mike McCandless, Liu Chao, Robert Muir)
+
+* LUCENE-2501: Fixed rare thread-safety issue that could cause
+  ArrayIndexOutOfBoundsException inside ByteBlockPool (Robert Muir,
+  Mike McCandless)
+
+* LUCENE-4297: BooleanScorer2 would multiply the coord() factor
+  twice for conjunctions: for most users this is no problem, but
+  if you had a customized Similarity that returned something other
+  than 1 when overlap == maxOverlap (always the case for conjunctions),
+  then the score would be incorrect.  (Pascal Chollet, Robert Muir)
+
+* LUCENE-4300: BooleanQuery's rewrite was not always safe: if you
+  had a custom Similarity where coord(1,1) != 1F, then the rewritten
+  query would be scored differently.  (Robert Muir)
+
+* LUCENE-4398: If you index many different field names in your
+  documents then due to a bug in how it measures its RAM
+  usage, IndexWriter would flush each segment too early eventually
+  reaching the point where it flushes after every doc.  (Tim Smith via
+  Mike McCandless)
+
+* LUCENE-4411: when sampling is enabled for a FacetRequest, its depth
+  parameter is reset to the default (1), even if set otherwise.
+  (Gilad Barkai via Shai Erera)
+
+* LUCENE-4635: Fixed ArrayIndexOutOfBoundsException when in-memory
+  terms index requires more than 2.1 GB RAM (indices with billions of
+  terms).  (Tom Burton-West via Mike McCandless)
+
+Documentation
+
+* LUCENE-4302: Fix facet userguide to have HTML loose doctype like
+  all other javadocs.  (Karl Nicholas via Uwe Schindler)
+
+
 ======================= Lucene 3.6.1 =======================
 More information about this release, including any errata related to the 
 release notes, upgrade instructions, or other changes may be found online at:
@ -2043,7 +2097,7 @@ Tests
  random graph tokens.  (Mike McCandless)

 * LUCENE-3968: factor out LookaheadTokenFilter from 
-  MockGraphTokenFilter (Mike Mccandless)
+  MockGraphTokenFilter (Mike McCandless)


 ======================= Lucene 3.6.0 =======================
@ -2323,7 +2377,7 @@ Bug fixes

 * LUCENE-3876: Fix bug where positions for a document exceeding
  Integer.MAX_VALUE/2 would produce a corrupt index.  
-  (Simon Willnauer, Mike Mccandless, Robert Muir)
+  (Simon Willnauer, Mike McCandless, Robert Muir)

 * LUCENE-3880: UAX29URLEmailTokenizer now recognizes emails when the mailto:
  scheme is prepended. (Kai Gülzau, Steve Rowe)
--- a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/TokenInfoFST.java
+++ b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/TokenInfoFST.java
@ -19,8 +19,8 @@ package org.apache.lucene.analysis.ja.dict;

 import java.io.IOException;

-import org.apache.lucene.util.fst.FST;
 import org.apache.lucene.util.fst.FST.Arc;
+import org.apache.lucene.util.fst.FST;

 /**
 * Thin wrapper around an FST with root-arc caching for Japanese.
@ -48,7 +48,7 @@ public final class TokenInfoFST {
    rootCache = cacheRootArcs();
  }
  
-  @SuppressWarnings("unchecked")
+  @SuppressWarnings({"rawtypes","unchecked"})
  private FST.Arc<Long>[] cacheRootArcs() throws IOException {
    FST.Arc<Long> rootCache[] = new FST.Arc[1+(cacheCeiling-0x3040)];
    FST.Arc<Long> firstArc = new FST.Arc<Long>();
--- a/lucene/analysis/kuromoji/src/resources/org/apache/lucene/analysis/ja/dict/TokenInfoDictionary$fst.dat
+++ b/lucene/analysis/kuromoji/src/resources/org/apache/lucene/analysis/ja/dict/TokenInfoDictionary$fst.dat
--- a/lucene/analysis/kuromoji/src/tools/java/org/apache/lucene/analysis/ja/util/TokenInfoDictionaryBuilder.java
+++ b/lucene/analysis/kuromoji/src/tools/java/org/apache/lucene/analysis/ja/util/TokenInfoDictionaryBuilder.java
@ -132,7 +132,7 @@ public class TokenInfoDictionaryBuilder {
    System.out.println("  encode...");

    PositiveIntOutputs fstOutput = PositiveIntOutputs.getSingleton(true);
-    Builder<Long> fstBuilder = new Builder<Long>(FST.INPUT_TYPE.BYTE2, 0, 0, true, true, Integer.MAX_VALUE, fstOutput, null, true, true);
+    Builder<Long> fstBuilder = new Builder<Long>(FST.INPUT_TYPE.BYTE2, 0, 0, true, true, Integer.MAX_VALUE, fstOutput, null, true, PackedInts.DEFAULT, true, 15);
    IntsRef scratch = new IntsRef();
    long ord = -1; // first ord will be 0
    String lastValue = null;
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/memory/MemoryPostingsFormat.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/memory/MemoryPostingsFormat.java
@ -113,7 +113,7 @@ public final class MemoryPostingsFormat extends PostingsFormat {
      this.field = field;
      this.doPackFST = doPackFST;
      this.acceptableOverheadRatio = acceptableOverheadRatio;
-      builder = new Builder<BytesRef>(FST.INPUT_TYPE.BYTE1, 0, 0, true, true, Integer.MAX_VALUE, outputs, null, doPackFST, acceptableOverheadRatio, true);
+      builder = new Builder<BytesRef>(FST.INPUT_TYPE.BYTE1, 0, 0, true, true, Integer.MAX_VALUE, outputs, null, doPackFST, acceptableOverheadRatio, true, 15);
    }

    private class PostingsWriter extends PostingsConsumer {
--- a/lucene/core/src/java/org/apache/lucene/analysis/package.html
+++ b/lucene/core/src/java/org/apache/lucene/analysis/package.html
@ -230,7 +230,7 @@ and proximity searches (though sentence identification is not provided by Lucene
  create, or a combination of existing and newly created components.  Before
  pursuing this approach, you may find it worthwhile to explore the
  <a href="{@docRoot}/../analyzers-common/overview-summary.html">analyzers-common</a> library and/or ask on the 
-  <a href="http://lucene.apache.org/java/docs/mailinglists.html"
+  <a href="http://lucene.apache.org/core/discussion.html"
      >java-user@lucene.apache.org mailing list</a> first to see if what you
  need already exists. If you are still committed to creating your own
  Analyzer, have a look at the source code of any one of the many samples
--- a/lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsReader.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsReader.java
@ -276,13 +276,13 @@ public class BlockTreeTermsReader extends FieldsProducer {
   */
  public static class Stats {
    /** How many nodes in the index FST. */
-    public int indexNodeCount;
+    public long indexNodeCount;

    /** How many arcs in the index FST. */
-    public int indexArcCount;
+    public long indexArcCount;

    /** Byte size of the index. */
-    public int indexNumBytes;
+    public long indexNumBytes;

    /** Total number of terms in the field. */
    public long totalTermCount;
--- a/lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsWriter.java
@ -23,7 +23,6 @@ import java.util.Comparator;
 import java.util.List;

 import org.apache.lucene.index.FieldInfo.IndexOptions;
-import org.apache.lucene.index.DocsEnum;
 import org.apache.lucene.index.FieldInfo;
 import org.apache.lucene.index.FieldInfos;
 import org.apache.lucene.index.IndexFileNames;
@ -41,6 +40,7 @@ import org.apache.lucene.util.fst.BytesRefFSTEnum;
 import org.apache.lucene.util.fst.FST;
 import org.apache.lucene.util.fst.NoOutputs;
 import org.apache.lucene.util.fst.Util;
+import org.apache.lucene.util.packed.PackedInts;

 /*
  TODO:
@ -187,7 +187,7 @@ public class BlockTreeTermsWriter extends FieldsConsumer {
  public final static int DEFAULT_MAX_BLOCK_SIZE = 48;

  //public final static boolean DEBUG = false;
-  private final static boolean SAVE_DOT_FILES = false;
+  //private final static boolean SAVE_DOT_FILES = false;

  static final int OUTPUT_FLAGS_NUM_BITS = 2;
  static final int OUTPUT_FLAGS_MASK = 0x3;
@ -419,7 +419,8 @@ public class BlockTreeTermsWriter extends FieldsConsumer {
      final ByteSequenceOutputs outputs = ByteSequenceOutputs.getSingleton();
      final Builder<BytesRef> indexBuilder = new Builder<BytesRef>(FST.INPUT_TYPE.BYTE1,
                                                                   0, 0, true, false, Integer.MAX_VALUE,
-                                                                   outputs, null, false, true);
+                                                                   outputs, null, false,
+                                                                   PackedInts.COMPACT, true, 15);
      //if (DEBUG) {
      //  System.out.println("  compile index for prefix=" + prefix);
      //}
@ -962,7 +963,9 @@ public class BlockTreeTermsWriter extends FieldsConsumer {
                                         0, 0, true,
                                         true, Integer.MAX_VALUE,
                                         noOutputs,
-                                         new FindBlocks(), false, true);
+                                         new FindBlocks(), false,
+                                         PackedInts.COMPACT,
+                                         true, 15);

      postingsWriter.setField(fieldInfo);
    }
--- a/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
@ -22,6 +22,7 @@ import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Collection;
 import java.util.Collections;
+import java.util.Date;
 import java.util.HashMap;
 import java.util.HashSet;
 import java.util.Iterator;
@ -3475,6 +3476,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit {
    diagnostics.put("os.version", Constants.OS_VERSION);
    diagnostics.put("java.version", Constants.JAVA_VERSION);
    diagnostics.put("java.vendor", Constants.JAVA_VENDOR);
+    diagnostics.put("timestamp", Long.toString(new Date().getTime()));
    if (details != null) {
      diagnostics.putAll(details);
    }
--- a/lucene/core/src/java/org/apache/lucene/index/Norm.java
+++ b/lucene/core/src/java/org/apache/lucene/index/Norm.java
@ -116,15 +116,6 @@ public final class Norm  {
    this.field.setLongValue(norm);
  }

-  /**
-   * Sets a packed long norm value.
-   * @lucene.experimental
-   */
-  public void setPackedLong(long norm) {
-    setType(Type.VAR_INTS);
-    this.field.setLongValue(norm);
-  }
-
  /**
   * Sets a byte norm value
   */
--- a/lucene/core/src/java/org/apache/lucene/util/FixedBitSet.java
+++ b/lucene/core/src/java/org/apache/lucene/util/FixedBitSet.java
@ -38,7 +38,7 @@ import org.apache.lucene.search.DocIdSetIterator;

 public final class FixedBitSet extends DocIdSet implements Bits {
  private final long[] bits;
-  private int numBits;
+  private final int numBits;

  /** returns the number of 64 bit words it would take to hold numBits */
  public static int bits2words(int numBits) {
--- a/lucene/core/src/java/org/apache/lucene/util/fst/Builder.java
+++ b/lucene/core/src/java/org/apache/lucene/util/fst/Builder.java
@ -36,9 +36,13 @@ import org.apache.lucene.util.packed.PackedInts;
 * <p>NOTE: The algorithm is described at
 * http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.24.3698</p>
 *
- * The parameterized type T is the output type.  See the
+ * <p>The parameterized type T is the output type.  See the
 * subclasses of {@link Outputs}.
 *
+ * <p>FSTs larger than 2.1GB are now possible (as of Lucene
+ * 4.2).  FSTs containing more than 2.1B nodes are also now
+ * possible, however they cannot be packed.
+ *
 * @lucene.experimental
 */

@ -84,22 +88,11 @@ public class Builder<T> {
  /**
   * Instantiates an FST/FSA builder without any pruning. A shortcut
   * to {@link #Builder(FST.INPUT_TYPE, int, int, boolean,
-   * boolean, int, Outputs, FreezeTail, boolean, boolean)} with
-   * pruning options turned off.
+   * boolean, int, Outputs, FreezeTail, boolean, float,
+   * boolean, int)} with pruning options turned off.
   */
  public Builder(FST.INPUT_TYPE inputType, Outputs<T> outputs) {
-    this(inputType, 0, 0, true, true, Integer.MAX_VALUE, outputs, null, false, PackedInts.COMPACT, true);
-  }
-
-  /**
-   * Instantiates an FST/FSA builder with {@link PackedInts#DEFAULT}
-   * <code>acceptableOverheadRatio</code>.
-   */
-  public Builder(FST.INPUT_TYPE inputType, int minSuffixCount1, int minSuffixCount2, boolean doShareSuffix,
-      boolean doShareNonSingletonNodes, int shareMaxTailLength, Outputs<T> outputs,
-      FreezeTail<T> freezeTail, boolean willPackFST, boolean allowArrayArcs) {
-    this(inputType, minSuffixCount1, minSuffixCount2, doShareSuffix, doShareNonSingletonNodes,
-         shareMaxTailLength, outputs, freezeTail, willPackFST, PackedInts.DEFAULT, allowArrayArcs);
+    this(inputType, 0, 0, true, true, Integer.MAX_VALUE, outputs, null, false, PackedInts.COMPACT, true, 15);
  }

  /**
@ -147,10 +140,16 @@ public class Builder<T> {
   * @param allowArrayArcs Pass false to disable the array arc optimization
   *    while building the FST; this will make the resulting
   *    FST smaller but slower to traverse.
+   *
+   * @param bytesPageBits How many bits wide to make each
+   *    byte[] block in the BytesStore; if you know the FST
+   *    will be large then make this larger.  For example 15
+   *    bits = 32768 byte pages.
   */
  public Builder(FST.INPUT_TYPE inputType, int minSuffixCount1, int minSuffixCount2, boolean doShareSuffix,
                 boolean doShareNonSingletonNodes, int shareMaxTailLength, Outputs<T> outputs,
-                 FreezeTail<T> freezeTail, boolean doPackFST, float acceptableOverheadRatio, boolean allowArrayArcs) {
+                 FreezeTail<T> freezeTail, boolean doPackFST, float acceptableOverheadRatio, boolean allowArrayArcs,
+                 int bytesPageBits) {
    this.minSuffixCount1 = minSuffixCount1;
    this.minSuffixCount2 = minSuffixCount2;
    this.freezeTail = freezeTail;
@ -158,9 +157,9 @@ public class Builder<T> {
    this.shareMaxTailLength = shareMaxTailLength;
    this.doPackFST = doPackFST;
    this.acceptableOverheadRatio = acceptableOverheadRatio;
-    fst = new FST<T>(inputType, outputs, doPackFST, acceptableOverheadRatio, allowArrayArcs);
+    fst = new FST<T>(inputType, outputs, doPackFST, acceptableOverheadRatio, allowArrayArcs, bytesPageBits);
    if (doShareSuffix) {
-      dedupHash = new NodeHash<T>(fst);
+      dedupHash = new NodeHash<T>(fst, fst.bytes.getReverseReader(false));
    } else {
      dedupHash = null;
    }
@ -174,7 +173,7 @@ public class Builder<T> {
    }
  }

-  public int getTotStateCount() {
+  public long getTotStateCount() {
    return fst.nodeCount;
  }

@ -182,12 +181,12 @@ public class Builder<T> {
    return frontier[0].inputCount;
  }

-  public int getMappedStateCount() {
+  public long getMappedStateCount() {
    return dedupHash == null ? 0 : fst.nodeCount;
  }

  private CompiledNode compileNode(UnCompiledNode<T> nodeIn, int tailLength) throws IOException {
-    final int node;
+    final long node;
    if (dedupHash != null && (doShareNonSingletonNodes || nodeIn.numArcs <= 1) && tailLength <= shareMaxTailLength) {
      if (nodeIn.numArcs == 0) {
        node = fst.addNode(nodeIn);
@ -475,7 +474,7 @@ public class Builder<T> {
    fst.finish(compileNode(root, lastInput.length).node);

    if (doPackFST) {
-      return fst.pack(3, Math.max(10, fst.getNodeCount()/4), acceptableOverheadRatio);
+      return fst.pack(3, Math.max(10, (int) (fst.getNodeCount()/4)), acceptableOverheadRatio);
    } else {
      return fst;
    }
@ -513,8 +512,12 @@ public class Builder<T> {
    boolean isCompiled();
  }

+  public long fstSizeInBytes() {
+    return fst.sizeInBytes();
+  }
+
  static final class CompiledNode implements Node {
-    int node;
+    long node;
    @Override
    public boolean isCompiled() {
      return true;
--- a/lucene/core/src/java/org/apache/lucene/util/fst/BytesStore.java
+++ b/lucene/core/src/java/org/apache/lucene/util/fst/BytesStore.java
@ -0,0 +1,468 @@
+package org.apache.lucene.util.fst;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.lucene.store.DataInput;
+import org.apache.lucene.store.DataOutput;
+
+// TODO: merge with PagedBytes, except PagedBytes doesn't
+// let you read while writing which FST needs
+
+class BytesStore extends DataOutput {
+
+  private final List<byte[]> blocks = new ArrayList<byte[]>();
+
+  private final int blockSize;
+  private final int blockBits;
+  private final int blockMask;
+
+  private byte[] current;
+  private int nextWrite;
+
+  public BytesStore(int blockBits) {
+    this.blockBits = blockBits;
+    blockSize = 1 << blockBits;
+    blockMask = blockSize-1;
+    nextWrite = blockSize;
+  }
+
+  /** Pulls bytes from the provided IndexInput.  */
+  public BytesStore(DataInput in, int numBytes, int maxBlockSize) throws IOException {
+    int blockSize = 2;
+    int blockBits = 1;
+    while(blockSize < numBytes && blockSize < maxBlockSize) {
+      blockSize *= 2;
+      blockBits++;
+    }
+    this.blockBits = blockBits;
+    this.blockSize = blockSize;
+    this.blockMask = blockSize-1;
+    int left = numBytes;
+    while(left > 0) {
+      final int chunk = Math.min(blockSize, left);
+      byte[] block = new byte[chunk];
+      in.readBytes(block, 0, block.length);
+      blocks.add(block);
+      left -= chunk;
+    }
+
+    // So .getPosition still works
+    nextWrite = blocks.get(blocks.size()-1).length;
+  }
+
+  /** Absolute write byte; you must ensure dest is < max
+   *  position written so far. */
+  public void writeByte(int dest, byte b) {
+    int blockIndex = dest >> blockBits;
+    byte[] block = blocks.get(blockIndex);
+    block[dest & blockMask] = b;
+  }
+
+  @Override
+  public void writeByte(byte b) {
+    if (nextWrite == blockSize) {
+      current = new byte[blockSize];
+      blocks.add(current);
+      nextWrite = 0;
+    }
+    current[nextWrite++] = b;
+  }
+
+  @Override
+  public void writeBytes(byte[] b, int offset, int len) {
+    while (len > 0) {
+      int chunk = blockSize - nextWrite;
+      if (len <= chunk) {
+        System.arraycopy(b, offset, current, nextWrite, len);
+        nextWrite += len;
+        break;
+      } else {
+        if (chunk > 0) {
+          System.arraycopy(b, offset, current, nextWrite, chunk);
+          offset += chunk;
+          len -= chunk;
+        }
+        current = new byte[blockSize];
+        blocks.add(current);
+        nextWrite = 0;
+      }
+    }
+  }
+
+  int getBlockBits() {
+    return blockBits;
+  }
+
+  /** Absolute writeBytes without changing the current
+   *  position.  Note: this cannot "grow" the bytes, so you
+   *  must only call it on already written parts. */
+  void writeBytes(long dest, byte[] b, int offset, int len) {
+    //System.out.println("  BS.writeBytes dest=" + dest + " offset=" + offset + " len=" + len);
+    assert dest + len <= getPosition(): "dest=" + dest + " pos=" + getPosition() + " len=" + len;
+
+    // Note: weird: must go "backwards" because copyBytes
+    // calls us with overlapping src/dest.  If we
+    // go forwards then we overwrite bytes before we can
+    // copy them:
+
+    /*
+    int blockIndex = dest >> blockBits;
+    int upto = dest & blockMask;
+    byte[] block = blocks.get(blockIndex);
+    while (len > 0) {
+      int chunk = blockSize - upto;
+      System.out.println("    cycle chunk=" + chunk + " len=" + len);
+      if (len <= chunk) {
+        System.arraycopy(b, offset, block, upto, len);
+        break;
+      } else {
+        System.arraycopy(b, offset, block, upto, chunk);
+        offset += chunk;
+        len -= chunk;
+        blockIndex++;
+        block = blocks.get(blockIndex);
+        upto = 0;
+      }
+    }
+    */
+
+    final long end = dest + len;
+    int blockIndex = (int) (end >> blockBits);
+    int downTo = (int) (end & blockMask);
+    if (downTo == 0) {
+      blockIndex--;
+      downTo = blockSize;
+    }
+    byte[] block = blocks.get(blockIndex);
+
+    while (len > 0) {
+      //System.out.println("    cycle downTo=" + downTo + " len=" + len);
+      if (len <= downTo) {
+        //System.out.println("      final: offset=" + offset + " len=" + len + " dest=" + (downTo-len));
+        System.arraycopy(b, offset, block, downTo-len, len);
+        break;
+      } else {
+        len -= downTo;
+        //System.out.println("      partial: offset=" + (offset + len) + " len=" + downTo + " dest=0");
+        System.arraycopy(b, offset + len, block, 0, downTo);
+        blockIndex--;
+        block = blocks.get(blockIndex);
+        downTo = blockSize;
+      }
+    }
+  }
+
+  /** Absolute copy bytes self to self, without changing the
+   *  position. Note: this cannot "grow" the bytes, so must
+   *  only call it on already written parts. */
+  public void copyBytes(long src, long dest, int len) {
+    //System.out.println("BS.copyBytes src=" + src + " dest=" + dest + " len=" + len);
+    assert src < dest;
+
+    // Note: weird: must go "backwards" because copyBytes
+    // calls us with overlapping src/dest.  If we
+    // go forwards then we overwrite bytes before we can
+    // copy them:
+
+    /*
+    int blockIndex = src >> blockBits;
+    int upto = src & blockMask;
+    byte[] block = blocks.get(blockIndex);
+    while (len > 0) {
+      int chunk = blockSize - upto;
+      System.out.println("  cycle: chunk=" + chunk + " len=" + len);
+      if (len <= chunk) {
+        writeBytes(dest, block, upto, len);
+        break;
+      } else {
+        writeBytes(dest, block, upto, chunk);
+        blockIndex++;
+        block = blocks.get(blockIndex);
+        upto = 0;
+        len -= chunk;
+        dest += chunk;
+      }
+    }
+    */
+
+    long end = src + len;
+
+    int blockIndex = (int) (end >> blockBits);
+    int downTo = (int) (end & blockMask);
+    if (downTo == 0) {
+      blockIndex--;
+      downTo = blockSize;
+    }
+    byte[] block = blocks.get(blockIndex);
+
+    while (len > 0) {
+      //System.out.println("  cycle downTo=" + downTo);
+      if (len <= downTo) {
+        //System.out.println("    finish");
+        writeBytes(dest, block, downTo-len, len);
+        break;
+      } else {
+        //System.out.println("    partial");
+        len -= downTo;
+        writeBytes(dest + len, block, 0, downTo);
+        blockIndex--;
+        block = blocks.get(blockIndex);
+        downTo = blockSize;
+      }
+    }
+  }
+
+  /** Writes an int at the absolute position without
+   *  changing the current pointer. */
+  public void writeInt(long pos, int value) {
+    int blockIndex = (int) (pos >> blockBits);
+    int upto = (int) (pos & blockMask);
+    byte[] block = blocks.get(blockIndex);
+    int shift = 24;
+    for(int i=0;i<4;i++) {
+      block[upto++] = (byte) (value >> shift);
+      shift -= 8;
+      if (upto == blockSize) {
+        upto = 0;
+        blockIndex++;
+        block = blocks.get(blockIndex);
+      }
+    }
+  }
+
+  /** Reverse from srcPos, inclusive, to destPos, inclusive. */
+  public void reverse(long srcPos, long destPos) {
+    assert srcPos < destPos;
+    assert destPos < getPosition();
+    //System.out.println("reverse src=" + srcPos + " dest=" + destPos);
+
+    int srcBlockIndex = (int) (srcPos >> blockBits);
+    int src = (int) (srcPos & blockMask);
+    byte[] srcBlock = blocks.get(srcBlockIndex);
+
+    int destBlockIndex = (int) (destPos >> blockBits);
+    int dest = (int) (destPos & blockMask);
+    byte[] destBlock = blocks.get(destBlockIndex);
+    //System.out.println("  srcBlock=" + srcBlockIndex + " destBlock=" + destBlockIndex);
+
+    int limit = (int) (destPos - srcPos + 1)/2;
+    for(int i=0;i<limit;i++) {
+      //System.out.println("  cycle src=" + src + " dest=" + dest);
+      byte b = srcBlock[src];
+      srcBlock[src] = destBlock[dest];
+      destBlock[dest] = b;
+      src++;
+      if (src == blockSize) {
+        srcBlockIndex++;
+        srcBlock = blocks.get(srcBlockIndex);
+        //System.out.println("  set destBlock=" + destBlock + " srcBlock=" + srcBlock);
+        src = 0;
+      }
+
+      dest--;
+      if (dest == -1) {
+        destBlockIndex--;
+        destBlock = blocks.get(destBlockIndex);
+        //System.out.println("  set destBlock=" + destBlock + " srcBlock=" + srcBlock);
+        dest = blockSize-1;
+      }
+    }
+  }
+
+  public void skipBytes(int len) {
+    while (len > 0) {
+      int chunk = blockSize - nextWrite;
+      if (len <= chunk) {
+        nextWrite += len;
+        break;
+      } else {
+        len -= chunk;
+        current = new byte[blockSize];
+        blocks.add(current);
+        nextWrite = 0;
+      }
+    }
+  }
+
+  public long getPosition() {
+    return ((long) blocks.size()-1) * blockSize + nextWrite;
+  }
+
+  /** Pos must be less than the max position written so far!
+   *  Ie, you cannot "grow" the file with this! */
+  public void truncate(long newLen) {
+    assert newLen <= getPosition();
+    assert newLen >= 0;
+    int blockIndex = (int) (newLen >> blockBits);
+    nextWrite = (int) (newLen & blockMask);
+    if (nextWrite == 0) {
+      blockIndex--;
+      nextWrite = blockSize;
+    }
+    blocks.subList(blockIndex+1, blocks.size()).clear();
+    if (newLen == 0) {
+      current = null;
+    } else {
+      current = blocks.get(blockIndex);
+    }
+    assert newLen == getPosition();
+  }
+
+  public void finish() {
+    if (current != null) {
+      byte[] lastBuffer = new byte[nextWrite];
+      System.arraycopy(current, 0, lastBuffer, 0, nextWrite);
+      blocks.set(blocks.size()-1, lastBuffer);
+      current = null;
+    }
+  }
+
+  /** Writes all of our bytes to the target {@link DataOutput}. */
+  public void writeTo(DataOutput out) throws IOException {
+    for(byte[] block : blocks) {
+      out.writeBytes(block, 0, block.length);
+    }
+  }
+
+  public FST.BytesReader getForwardReader() {
+    if (blocks.size() == 1) {
+      return new ForwardBytesReader(blocks.get(0));
+    }
+    return new FST.BytesReader() {
+      private byte[] current;
+      private int nextBuffer;
+      private int nextRead = blockSize;
+
+      @Override
+      public byte readByte() {
+        if (nextRead == blockSize) {
+          current = blocks.get(nextBuffer++);
+          nextRead = 0;
+        }
+        return current[nextRead++];
+      }
+
+      @Override
+      public void skipBytes(int count) {
+        setPosition(getPosition() + count);
+      }
+
+      @Override
+      public void readBytes(byte[] b, int offset, int len) {
+        while(len > 0) {
+          int chunkLeft = blockSize - nextRead;
+          if (len <= chunkLeft) {
+            System.arraycopy(current, nextRead, b, offset, len);
+            nextRead += len;
+            break;
+          } else {
+            if (chunkLeft > 0) {
+              System.arraycopy(current, nextRead, b, offset, chunkLeft);
+              offset += chunkLeft;
+              len -= chunkLeft;
+            }
+            current = blocks.get(nextBuffer++);
+            nextRead = 0;
+          }
+        }
+      }
+
+      @Override
+      public long getPosition() {
+        return ((long) nextBuffer-1)*blockSize + nextRead;
+      }
+
+      @Override
+      public void setPosition(long pos) {
+        int bufferIndex = (int) (pos >> blockBits);
+        nextBuffer = bufferIndex+1;
+        current = blocks.get(bufferIndex);
+        nextRead = (int) (pos & blockMask);
+        assert getPosition() == pos;
+      }
+
+      @Override
+      public boolean reversed() {
+        return false;
+      }
+    };
+  }
+
+  public FST.BytesReader getReverseReader() {
+    return getReverseReader(true);
+  }
+
+  FST.BytesReader getReverseReader(boolean allowSingle) {
+    if (allowSingle && blocks.size() == 1) {
+      return new ReverseBytesReader(blocks.get(0));
+    }
+    return new FST.BytesReader() {
+      private byte[] current = blocks.size() == 0 ? null : blocks.get(0);
+      private int nextBuffer = -1;
+      private int nextRead = 0;
+
+      @Override
+      public byte readByte() {
+        if (nextRead == -1) {
+          current = blocks.get(nextBuffer--);
+          nextRead = blockSize-1;
+        }
+        return current[nextRead--];
+      }
+
+      @Override
+      public void skipBytes(int count) {
+        setPosition(getPosition() - count);
+      }
+
+      @Override
+      public void readBytes(byte[] b, int offset, int len) {
+        for(int i=0;i<len;i++) {
+          b[offset+i] = readByte();
+        }
+      }
+
+      @Override
+      public long getPosition() {
+        return ((long) nextBuffer+1)*blockSize + nextRead;
+      }
+
+      @Override
+      public void setPosition(long pos) {
+        // NOTE: a little weird because if you
+        // setPosition(0), the next byte you read is
+        // bytes[0] ... but I would expect bytes[-1] (ie,
+        // EOF)...?
+        int bufferIndex = (int) (pos >> blockBits);
+        nextBuffer = bufferIndex-1;
+        current = blocks.get(bufferIndex);
+        nextRead = (int) (pos & blockMask);
+        assert getPosition() == pos: "pos=" + pos + " getPos()=" + getPosition();
+      }
+
+      @Override
+      public boolean reversed() {
+        return true;
+      }
+    };
+  }
+}
--- a/lucene/core/src/java/org/apache/lucene/util/fst/FST.java
+++ b/lucene/core/src/java/org/apache/lucene/util/fst/FST.java
--- a/lucene/core/src/java/org/apache/lucene/util/fst/FSTEnum.java
+++ b/lucene/core/src/java/org/apache/lucene/util/fst/FSTEnum.java
@ -17,11 +17,11 @@ package org.apache.lucene.util.fst;
 * limitations under the License.
 */

+import java.io.IOException;
+
 import org.apache.lucene.util.ArrayUtil;
 import org.apache.lucene.util.RamUsageEstimator;

-import java.io.IOException;
-
 /** Can next() and advance() through the terms in an FST
 *
  * @lucene.experimental
@ -153,8 +153,8 @@ abstract class FSTEnum<T> {
        boolean found = false;
        while (low <= high) {
          mid = (low + high) >>> 1;
-          in.pos = arc.posArcsStart;
-          in.skip(arc.bytesPerArc*mid+1);
+          in.setPosition(arc.posArcsStart);
+          in.skipBytes(arc.bytesPerArc*mid+1);
          final int midLabel = fst.readLabel(in);
          final int cmp = midLabel - targetLabel;
          //System.out.println("  cycle low=" + low + " high=" + high + " mid=" + mid + " midLabel=" + midLabel + " cmp=" + cmp);
@ -292,8 +292,8 @@ abstract class FSTEnum<T> {
        boolean found = false;
        while (low <= high) {
          mid = (low + high) >>> 1;
-          in.pos = arc.posArcsStart;
-          in.skip(arc.bytesPerArc*mid+1);
+          in.setPosition(arc.posArcsStart);
+          in.skipBytes(arc.bytesPerArc*mid+1);
          final int midLabel = fst.readLabel(in);
          final int cmp = midLabel - targetLabel;
          //System.out.println("  cycle low=" + low + " high=" + high + " mid=" + mid + " midLabel=" + midLabel + " cmp=" + cmp);
--- a/lucene/core/src/java/org/apache/lucene/util/fst/ForwardBytesReader.java
+++ b/lucene/core/src/java/org/apache/lucene/util/fst/ForwardBytesReader.java
@ -0,0 +1,62 @@
+package org.apache.lucene.util.fst;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// TODO: can we use just ByteArrayDataInput...?  need to
+// add a .skipBytes to DataInput.. hmm and .setPosition
+
+/** Reads from a single byte[]. */
+final class ForwardBytesReader extends FST.BytesReader {
+  private final byte[] bytes;
+  private int pos;
+
+  public ForwardBytesReader(byte[] bytes) {
+    this.bytes = bytes;
+  }
+
+  @Override
+  public byte readByte() {
+    return bytes[pos++];
+  }
+
+  @Override
+  public void readBytes(byte[] b, int offset, int len) {
+    System.arraycopy(bytes, pos, b, offset, len);
+    pos += len;
+  }
+
+  @Override
+  public void skipBytes(int count) {
+    pos += count;
+  }
+
+  @Override
+  public long getPosition() {
+    return pos;
+  }
+
+  @Override
+  public void setPosition(long pos) {
+    this.pos = (int) pos;
+  }
+
+  @Override
+  public boolean reversed() {
+    return false;
+  }
+}
--- a/lucene/core/src/java/org/apache/lucene/util/fst/NodeHash.java
+++ b/lucene/core/src/java/org/apache/lucene/util/fst/NodeHash.java
@ -19,22 +19,27 @@ package org.apache.lucene.util.fst;

 import java.io.IOException;

+import org.apache.lucene.util.packed.GrowableWriter;
+import org.apache.lucene.util.packed.PackedInts;
+
 // Used to dedup states (lookup already-frozen states)
 final class NodeHash<T> {

-  private int[] table;
+  private GrowableWriter table;
  private int count;
  private int mask;
  private final FST<T> fst;
  private final FST.Arc<T> scratchArc = new FST.Arc<T>();
+  private final FST.BytesReader in;

-  public NodeHash(FST<T> fst) {
-    table = new int[16];
+  public NodeHash(FST<T> fst, FST.BytesReader in) {
+    table = new GrowableWriter(8, 16, PackedInts.COMPACT);
    mask = 15;
    this.fst = fst;
+    this.in = in;
  }

-  private boolean nodesEqual(Builder.UnCompiledNode<T> node, int address, FST.BytesReader in) throws IOException {
+  private boolean nodesEqual(Builder.UnCompiledNode<T> node, long address) throws IOException {
    fst.readFirstRealTargetArc(address, scratchArc, in);
    if (scratchArc.bytesPerArc != 0 && node.numArcs != scratchArc.numArcs) {
      return false;
@ -73,7 +78,8 @@ final class NodeHash<T> {
      final Builder.Arc<T> arc = node.arcs[arcIdx];
      //System.out.println("  label=" + arc.label + " target=" + ((Builder.CompiledNode) arc.target).node + " h=" + h + " output=" + fst.outputs.outputToString(arc.output) + " isFinal?=" + arc.isFinal);
      h = PRIME * h + arc.label;
-      h = PRIME * h + ((Builder.CompiledNode) arc.target).node;
+      long n = ((Builder.CompiledNode) arc.target).node;
+      h = PRIME * h + (int) (n^(n>>32));
      h = PRIME * h + arc.output.hashCode();
      h = PRIME * h + arc.nextFinalOutput.hashCode();
      if (arc.isFinal) {
@ -85,16 +91,15 @@ final class NodeHash<T> {
  }

  // hash code for a frozen node
-  private int hash(int node) throws IOException {
+  private int hash(long node) throws IOException {
    final int PRIME = 31;
-    final FST.BytesReader in = fst.getBytesReader(0);
    //System.out.println("hash frozen node=" + node);
    int h = 0;
    fst.readFirstRealTargetArc(node, scratchArc, in);
    while(true) {
-      //System.out.println("  label=" + scratchArc.label + " target=" + scratchArc.target + " h=" + h + " output=" + fst.outputs.outputToString(scratchArc.output) + " next?=" + scratchArc.flag(4) + " final?=" + scratchArc.isFinal());
+      //System.out.println("  label=" + scratchArc.label + " target=" + scratchArc.target + " h=" + h + " output=" + fst.outputs.outputToString(scratchArc.output) + " next?=" + scratchArc.flag(4) + " final?=" + scratchArc.isFinal() + " pos=" + in.getPosition());
      h = PRIME * h + scratchArc.label;
-      h = PRIME * h + scratchArc.target;
+      h = PRIME * h + (int) (scratchArc.target^(scratchArc.target>>32));
      h = PRIME * h + scratchArc.output.hashCode();
      h = PRIME * h + scratchArc.nextFinalOutput.hashCode();
      if (scratchArc.isFinal()) {
@ -109,26 +114,25 @@ final class NodeHash<T> {
    return h & Integer.MAX_VALUE;
  }

-  public int add(Builder.UnCompiledNode<T> nodeIn) throws IOException {
-    // System.out.println("hash: add count=" + count + " vs " + table.length);
-    final FST.BytesReader in = fst.getBytesReader(0);
+  public long add(Builder.UnCompiledNode<T> nodeIn) throws IOException {
+    // System.out.println("hash: add count=" + count + " vs " + table.size());
    final int h = hash(nodeIn);
    int pos = h & mask;
    int c = 0;
    while(true) {
-      final int v = table[pos];
+      final long v = table.get(pos);
      if (v == 0) {
        // freeze & add
-        final int node = fst.addNode(nodeIn);
+        final long node = fst.addNode(nodeIn);
        //System.out.println("  now freeze node=" + node);
        assert hash(node) == h : "frozenHash=" + hash(node) + " vs h=" + h;
        count++;
-        table[pos] = node;
-        if (table.length < 2*count) {
+        table.set(pos, node);
+        if (table.size() < 2*count) {
          rehash();
        }
        return node;
-      } else if (nodesEqual(nodeIn, v, in)) {
+      } else if (nodesEqual(nodeIn, v)) {
        // same node is already here
        return v;
      }
@ -139,12 +143,12 @@ final class NodeHash<T> {
  }

  // called only by rehash
-  private void addNew(int address) throws IOException {
+  private void addNew(long address) throws IOException {
    int pos = hash(address) & mask;
    int c = 0;
    while(true) {
-      if (table[pos] == 0) {
-        table[pos] = address;
+      if (table.get(pos) == 0) {
+        table.set(pos, address);
        break;
      }

@ -154,16 +158,16 @@ final class NodeHash<T> {
  }

  private void rehash() throws IOException {
-    final int[] oldTable = table;
+    final GrowableWriter oldTable = table;

-    if (oldTable.length >= Integer.MAX_VALUE/2) {
+    if (oldTable.size() >= Integer.MAX_VALUE/2) {
      throw new IllegalStateException("FST too large (> 2.1 GB)");
    }

-    table = new int[2*table.length];
-    mask = table.length-1;
-    for(int idx=0;idx<oldTable.length;idx++) {
-      final int address = oldTable[idx];
+    table = new GrowableWriter(oldTable.getBitsPerValue(), 2*oldTable.size(), PackedInts.COMPACT);
+    mask = table.size()-1;
+    for(int idx=0;idx<oldTable.size();idx++) {
+      final long address = oldTable.get(idx);
      if (address != 0) {
        addNew(address);
      }
--- a/lucene/core/src/java/org/apache/lucene/util/fst/ReverseBytesReader.java
+++ b/lucene/core/src/java/org/apache/lucene/util/fst/ReverseBytesReader.java
@ -0,0 +1,61 @@
+package org.apache.lucene.util.fst;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/** Reads in reverse from a single byte[]. */
+final class ReverseBytesReader extends FST.BytesReader {
+  private final byte[] bytes;
+  private int pos;
+
+  public ReverseBytesReader(byte[] bytes) {
+    this.bytes = bytes;
+  }
+
+  @Override
+  public byte readByte() {
+    return bytes[pos--];
+  }
+
+  @Override
+  public void readBytes(byte[] b, int offset, int len) {
+    for(int i=0;i<len;i++) {
+      b[offset+i] = bytes[pos--];
+    }
+  }
+
+  @Override
+  public void skipBytes(int count) {
+    pos -= count;
+  }
+
+  @Override
+  public long getPosition() {
+    return pos;
+  }
+
+  @Override
+  public void setPosition(long pos) {
+    this.pos = (int) pos;
+  }
+
+  @Override
+  public boolean reversed() {
+    return true;
+  }
+}
+
--- a/lucene/core/src/java/org/apache/lucene/util/fst/Util.java
+++ b/lucene/core/src/java/org/apache/lucene/util/fst/Util.java
@ -39,7 +39,7 @@ public final class Util {
    // TODO: would be nice not to alloc this on every lookup
    final FST.Arc<T> arc = fst.getFirstArc(new FST.Arc<T>());

-    final FST.BytesReader fstReader = fst.getBytesReader(0);
+    final BytesReader fstReader = fst.getBytesReader(0);

    // Accumulate output as we go
    T output = fst.outputs.getNoOutput();
@ -64,7 +64,7 @@ public final class Util {
  public static<T> T get(FST<T> fst, BytesRef input) throws IOException {
    assert fst.inputType == FST.INPUT_TYPE.BYTE1;

-    final FST.BytesReader fstReader = fst.getBytesReader(0);
+    final BytesReader fstReader = fst.getBytesReader(0);

    // TODO: would be nice not to alloc this on every lookup
    final FST.Arc<T> arc = fst.getFirstArc(new FST.Arc<T>());
@ -101,7 +101,7 @@ public final class Util {
   *  fit this. */
  public static IntsRef getByOutput(FST<Long> fst, long targetOutput) throws IOException {

-    final FST.BytesReader in = fst.getBytesReader(0);
+    final BytesReader in = fst.getBytesReader(0);

    // TODO: would be nice not to alloc this on every lookup
    FST.Arc<Long> arc = fst.getFirstArc(new FST.Arc<Long>());
@ -147,8 +147,8 @@ public final class Util {
          boolean exact = false;
          while (low <= high) {
            mid = (low + high) >>> 1;
-            in.pos = arc.posArcsStart;
-            in.skip(arc.bytesPerArc*mid);
+            in.setPosition(arc.posArcsStart);
+            in.skipBytes(arc.bytesPerArc*mid);
            final byte flags = in.readByte();
            fst.readLabel(in);
            final long minArcOutput;
@ -273,7 +273,7 @@ public final class Util {
  public static class TopNSearcher<T> {

    private final FST<T> fst;
-    private final FST.BytesReader bytesReader;
+    private final BytesReader bytesReader;
    private final int topN;
    private final int maxQueueDepth;

@ -374,7 +374,7 @@ public final class Util {

      //System.out.println("search topN=" + topN);

-      final FST.BytesReader fstReader = fst.getBytesReader(0);
+      final BytesReader fstReader = fst.getBytesReader(0);
      final T NO_OUTPUT = fst.outputs.getNoOutput();

      // TODO: we could enable FST to sorting arcs by weight
@ -544,7 +544,9 @@ public final class Util {
   * </pre>
   * 
   * <p>
-   * Note: larger FSTs (a few thousand nodes) won't even render, don't bother.
+   * Note: larger FSTs (a few thousand nodes) won't even
+   * render, don't bother.  If the FST is > 2.1 GB in size
+   * then this method will throw strange exceptions.
   * 
   * @param sameRank
   *          If <code>true</code>, the resulting <code>dot</code> file will try
@ -578,7 +580,7 @@ public final class Util {

    // A bitset of already seen states (target offset).
    final BitSet seen = new BitSet();
-    seen.set(startArc.target);
+    seen.set((int) startArc.target);

    // Shape for states.
    final String stateShape = "circle";
@ -595,7 +597,7 @@ public final class Util {
    emitDotState(out, "initial", "point", "white", "");

    final T NO_OUTPUT = fst.outputs.getNoOutput();
-    final FST.BytesReader r = fst.getBytesReader(0);
+    final BytesReader r = fst.getBytesReader(0);

    // final FST.Arc<T> scratchArc = new FST.Arc<T>();

@ -617,7 +619,7 @@ public final class Util {
        finalOutput = null;
      }
      
-      emitDotState(out, Integer.toString(startArc.target), isFinal ? finalStateShape : stateShape, stateColor, finalOutput == null ? "" : fst.outputs.outputToString(finalOutput));
+      emitDotState(out, Long.toString(startArc.target), isFinal ? finalStateShape : stateShape, stateColor, finalOutput == null ? "" : fst.outputs.outputToString(finalOutput));
    }

    out.write("  initial -> " + startArc.target + "\n");
@ -638,7 +640,8 @@ public final class Util {
        if (FST.targetHasArcs(arc)) {
          // scan all target arcs
          //System.out.println("  readFirstTarget...");
-          final int node = arc.target;
+
+          final long node = arc.target;

          fst.readFirstRealTargetArc(arc.target, arc, r);

@ -648,7 +651,7 @@ public final class Util {

            //System.out.println("  cycle arc=" + arc);
            // Emit the unseen state and add it to the queue for the next level.
-            if (arc.target >= 0 && !seen.get(arc.target)) {
+            if (arc.target >= 0 && !seen.get((int) arc.target)) {

              /*
              boolean isFinal = false;
@ -675,12 +678,12 @@ public final class Util {
                finalOutput = "";
              }

-              emitDotState(out, Integer.toString(arc.target), stateShape, stateColor, finalOutput);
+              emitDotState(out, Long.toString(arc.target), stateShape, stateColor, finalOutput);
              // To see the node address, use this instead:
              //emitDotState(out, Integer.toString(arc.target), stateShape, stateColor, String.valueOf(arc.target));
-              seen.set(arc.target);
+              seen.set((int) arc.target);
              nextLevelQueue.add(new FST.Arc<T>().copyFrom(arc));
-              sameLevelStates.add(arc.target);
+              sameLevelStates.add((int) arc.target);
            }

            String outs;
@ -893,8 +896,8 @@ public final class Util {
      // " targetLabel=" + targetLabel);
      while (low <= high) {
        mid = (low + high) >>> 1;
-        in.pos = arc.posArcsStart;
-        in.skip(arc.bytesPerArc * mid + 1);
+        in.setPosition(arc.posArcsStart);
+        in.skipBytes(arc.bytesPerArc * mid + 1);
        final int midLabel = fst.readLabel(in);
        final int cmp = midLabel - label;
        // System.out.println("  cycle low=" + low + " high=" + high + " mid=" +
--- a/lucene/core/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java
+++ b/lucene/core/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java
@ -115,6 +115,38 @@ public class TestBackwardsCompatibility extends LuceneTestCase {
  }

 */  
+
+  /*
+  public void testCreateMoreTermsIndex() throws Exception {
+    // we use a real directory name that is not cleaned up,
+    // because this method is only used to create backwards
+    // indexes:
+    File indexDir = new File("moreterms");
+    _TestUtil.rmDir(indexDir);
+    Directory dir = newFSDirectory(indexDir);
+
+    LogByteSizeMergePolicy mp = new LogByteSizeMergePolicy();
+    mp.setUseCompoundFile(false);
+    mp.setNoCFSRatio(1.0);
+    mp.setMaxCFSSegmentSizeMB(Double.POSITIVE_INFINITY);
+    // TODO: remove randomness
+    IndexWriterConfig conf = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()))
+      .setMergePolicy(mp);
+    conf.setCodec(Codec.forName("Lucene40"));
+    IndexWriter writer = new IndexWriter(dir, conf);
+    LineFileDocs docs = new LineFileDocs(null, true);
+    for(int i=0;i<50;i++) {
+      writer.addDocument(docs.nextDoc());
+    }
+    writer.close();
+    dir.close();
+
+    // Gives you time to copy the index out!: (there is also
+    // a test option to not remove temp dir...):
+    Thread.sleep(100000);
+  }
+  */
+  
  final static String[] oldNames = {"40.cfs",
                                    "40.nocfs",
  };
@ -916,4 +948,15 @@ public class TestBackwardsCompatibility extends LuceneTestCase {
      dir.close();
    }
  }
+
+  public static final String moreTermsIndex = "moreterms.40.zip";
+
+  public void testMoreTerms() throws Exception {
+    File oldIndexDir = _TestUtil.getTempDir("moreterms");
+    _TestUtil.unzip(getDataFile(moreTermsIndex), oldIndexDir);
+    Directory dir = newFSDirectory(oldIndexDir);
+    // TODO: more tests
+    _TestUtil.checkIndex(dir);
+    dir.close();
+  }
 }
--- a/lucene/core/src/test/org/apache/lucene/index/TestCustomNorms.java
+++ b/lucene/core/src/test/org/apache/lucene/index/TestCustomNorms.java
@ -22,7 +22,6 @@ import java.util.Random;
 import org.apache.lucene.analysis.MockAnalyzer;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
-import org.apache.lucene.document.StringField;
 import org.apache.lucene.document.TextField;
 import org.apache.lucene.index.DocValues.Source;
 import org.apache.lucene.index.DocValues.Type;
@ -31,12 +30,14 @@ import org.apache.lucene.search.TermStatistics;
 import org.apache.lucene.search.similarities.DefaultSimilarity;
 import org.apache.lucene.search.similarities.PerFieldSimilarityWrapper;
 import org.apache.lucene.search.similarities.Similarity;
+import org.apache.lucene.search.similarities.Similarity.ExactSimScorer;
+import org.apache.lucene.search.similarities.Similarity.SimWeight;
+import org.apache.lucene.search.similarities.Similarity.SloppySimScorer;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.util.Bits;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.LineFileDocs;
 import org.apache.lucene.util.LuceneTestCase;
-import org.apache.lucene.util._TestUtil;

 /**
 * 
@ -87,39 +88,6 @@ public class TestCustomNorms extends LuceneTestCase {
    docs.close();
  }

-  public void testPackedNorms() throws IOException {
-    Directory dir = newDirectory();
-    IndexWriterConfig config = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
-    config.setSimilarity(new PackedNormSimilarity());
-    RandomIndexWriter writer = new RandomIndexWriter(random(), dir, config);
-    int num = _TestUtil.nextInt(random(), 1, 1000);
-    for (int i = 0; i < num; i++) {
-      Document doc = new Document();
-      doc.add(new StringField("len", Integer.toString(i), Field.Store.YES));
-      StringBuilder sb = new StringBuilder();
-      for (int j = 0; j < i; j++) {
-        sb.append(" token");
-      }
-      doc.add(new TextField("content", sb.toString(), Field.Store.NO));
-      writer.addDocument(doc);
-    }
-    
-    DirectoryReader ir = writer.getReader();
-    writer.close();
-    for (AtomicReaderContext context : ir.leaves()) {
-      AtomicReader reader = context.reader();
-      DocValues norms = reader.normValues("content");
-      assertNotNull(norms);
-      Source source = norms.getSource();
-      assertEquals(Type.VAR_INTS, source.getType());
-      for (int i = 0; i < reader.maxDoc(); i++) {
-        assertEquals(source.getInt(i), Long.parseLong(reader.document(i).get("len")));
-      }
-    }
-    ir.close();
-    dir.close();
-  }
-
  public void testExceptionOnRandomType() throws IOException {
    Directory dir = newDirectory();
    IndexWriterConfig config = newIndexWriterConfig(TEST_VERSION_CURRENT,
@ -335,27 +303,4 @@ public class TestCustomNorms extends LuceneTestCase {
    }
  }

-  class PackedNormSimilarity extends Similarity {
-
-    @Override
-    public void computeNorm(FieldInvertState state, Norm norm) {
-      norm.setPackedLong(state.getLength());
-    }
-
-    @Override
-    public SimWeight computeWeight(float queryBoost, CollectionStatistics collectionStats, TermStatistics... termStats) {
-      throw new UnsupportedOperationException();
-    }
-
-    @Override
-    public ExactSimScorer exactSimScorer(SimWeight weight, AtomicReaderContext context) throws IOException {
-      throw new UnsupportedOperationException();
-    }
-
-    @Override
-    public SloppySimScorer sloppySimScorer(SimWeight weight, AtomicReaderContext context) throws IOException {
-      throw new UnsupportedOperationException();
-    }
-  }
-
 }
--- a/lucene/core/src/test/org/apache/lucene/index/moreterms.40.zip
+++ b/lucene/core/src/test/org/apache/lucene/index/moreterms.40.zip
--- a/lucene/core/src/test/org/apache/lucene/util/fst/Test2BFST.java
+++ b/lucene/core/src/test/org/apache/lucene/util/fst/Test2BFST.java
@ -0,0 +1,261 @@
+package org.apache.lucene.util.fst;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.util.Arrays;
+import java.util.Random;
+
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.IntsRef;
+import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.util.TimeUnits;
+import org.apache.lucene.util.packed.PackedInts;
+import org.junit.Ignore;
+import com.carrotsearch.randomizedtesting.annotations.TimeoutSuite;
+
+@Ignore("Requires tons of heap to run (10G works)")
+@TimeoutSuite(millis = 100 * TimeUnits.HOUR)
+public class Test2BFST extends LuceneTestCase {
+
+  private static long LIMIT = 3L*1024*1024*1024;
+
+  public void test() throws Exception {
+    int[] ints = new int[7];
+    IntsRef input = new IntsRef(ints, 0, ints.length);
+    long seed = random().nextLong();
+
+    for(int doPackIter=0;doPackIter<2;doPackIter++) {
+      boolean doPack = doPackIter == 1;
+
+      // Build FST w/ NoOutputs and stop when nodeCount > 3B
+      if (!doPack) {
+        System.out.println("\nTEST: 3B nodes; doPack=false output=NO_OUTPUTS");
+        Outputs<Object> outputs = NoOutputs.getSingleton();
+        Object NO_OUTPUT = outputs.getNoOutput();
+        final Builder<Object> b = new Builder<Object>(FST.INPUT_TYPE.BYTE1, 0, 0, false, false, Integer.MAX_VALUE, outputs,
+                                                      null, doPack, PackedInts.COMPACT, true, 15);
+
+        int count = 0;
+        Random r = new Random(seed);
+        int[] ints2 = new int[200];
+        IntsRef input2 = new IntsRef(ints2, 0, ints2.length);
+        while(true) {
+          //System.out.println("add: " + input + " -> " + output);
+          for(int i=10;i<ints2.length;i++) {
+            ints2[i] = r.nextInt(256);
+          }
+          b.add(input2, NO_OUTPUT);
+          count++;
+          if (count % 100000 == 0) {
+            System.out.println(count + ": " + b.fstSizeInBytes() + " bytes; " + b.getTotStateCount() + " nodes");
+          }
+          if (b.getTotStateCount() > LIMIT) {
+            break;
+          }
+          nextInput(r, ints2);
+        }
+
+        FST<Object> fst = b.finish();
+
+        System.out.println("\nTEST: now verify [fst size=" + fst.sizeInBytes() + "; nodeCount=" + fst.getNodeCount() + "; arcCount=" + fst.getArcCount() + "]");
+
+        Arrays.fill(ints2, 0);
+        r = new Random(seed);
+
+        for(int i=0;i<count;i++) {
+          if (i % 1000000 == 0) {
+            System.out.println(i + "...: ");
+          }
+          for(int j=10;j<ints2.length;j++) {
+            ints2[j] = r.nextInt(256);
+          }
+          assertEquals(NO_OUTPUT, Util.get(fst, input2));
+          nextInput(r, ints2);
+        }
+
+        System.out.println("\nTEST: enum all input/outputs");
+        IntsRefFSTEnum<Object> fstEnum = new IntsRefFSTEnum<Object>(fst);
+
+        Arrays.fill(ints2, 0);
+        r = new Random(seed);
+        int upto = 0;
+        while(true) {
+          IntsRefFSTEnum.InputOutput<Object> pair = fstEnum.next();
+          if (pair == null) {
+            break;
+          }
+          for(int j=10;j<ints2.length;j++) {
+            ints2[j] = r.nextInt(256);
+          }
+          assertEquals(input2, pair.input);
+          assertEquals(NO_OUTPUT, pair.output);
+          upto++;
+          nextInput(r, ints2);
+        }
+        assertEquals(count, upto);
+      }
+
+      // Build FST w/ ByteSequenceOutputs and stop when FST
+      // size = 3GB
+      {
+        System.out.println("\nTEST: 3 GB size; doPack=" + doPack + " outputs=bytes");
+        Outputs<BytesRef> outputs = ByteSequenceOutputs.getSingleton();
+        final Builder<BytesRef> b = new Builder<BytesRef>(FST.INPUT_TYPE.BYTE1, 0, 0, true, true, Integer.MAX_VALUE, outputs,
+                                                          null, doPack, PackedInts.COMPACT, true, 15);
+
+        byte[] outputBytes = new byte[20];
+        BytesRef output = new BytesRef(outputBytes);
+        Arrays.fill(ints, 0);
+        int count = 0;
+        Random r = new Random(seed);
+        while(true) {
+          r.nextBytes(outputBytes);
+          //System.out.println("add: " + input + " -> " + output);
+          b.add(input, BytesRef.deepCopyOf(output));
+          count++;
+          if (count % 1000000 == 0) {
+            System.out.println(count + "...: " + b.fstSizeInBytes() + " bytes");
+          }
+          if (b.fstSizeInBytes() > LIMIT) {
+            break;
+          }
+          nextInput(r, ints);
+        }
+
+        FST<BytesRef> fst = b.finish();
+
+        System.out.println("\nTEST: now verify [fst size=" + fst.sizeInBytes() + "; nodeCount=" + fst.getNodeCount() + "; arcCount=" + fst.getArcCount() + "]");
+
+        r = new Random(seed);
+        Arrays.fill(ints, 0);
+
+        for(int i=0;i<count;i++) {
+          if (i % 1000000 == 0) {
+            System.out.println(i + "...: ");
+          }
+          r.nextBytes(outputBytes);
+          assertEquals(output, Util.get(fst, input));
+          nextInput(r, ints);
+        }
+
+        System.out.println("\nTEST: enum all input/outputs");
+        IntsRefFSTEnum<BytesRef> fstEnum = new IntsRefFSTEnum<BytesRef>(fst);
+
+        Arrays.fill(ints, 0);
+        r = new Random(seed);
+        int upto = 0;
+        while(true) {
+          IntsRefFSTEnum.InputOutput<BytesRef> pair = fstEnum.next();
+          if (pair == null) {
+            break;
+          }
+          assertEquals(input, pair.input);
+          r.nextBytes(outputBytes);
+          assertEquals(output, pair.output);
+          upto++;
+          nextInput(r, ints);
+        }
+        assertEquals(count, upto);
+      }
+
+      // Build FST w/ PositiveIntOutputs and stop when FST
+      // size = 3GB
+      {
+        System.out.println("\nTEST: 3 GB size; doPack=" + doPack + " outputs=long");
+        Outputs<Long> outputs = PositiveIntOutputs.getSingleton();
+        final Builder<Long> b = new Builder<Long>(FST.INPUT_TYPE.BYTE1, 0, 0, true, true, Integer.MAX_VALUE, outputs,
+                                                  null, doPack, PackedInts.COMPACT, true, 15);
+
+        long output = 1;
+
+        Arrays.fill(ints, 0);
+        int count = 0;
+        Random r = new Random(seed);
+        while(true) {
+          //System.out.println("add: " + input + " -> " + output);
+          b.add(input, output);
+          output += 1+r.nextInt(10);
+          count++;
+          if (count % 1000000 == 0) {
+            System.out.println(count + "...: " + b.fstSizeInBytes() + " bytes");
+          }
+          if (b.fstSizeInBytes() > LIMIT) {
+            break;
+          }
+          nextInput(r, ints);
+        }
+
+        FST<Long> fst = b.finish();
+
+        System.out.println("\nTEST: now verify [fst size=" + fst.sizeInBytes() + "; nodeCount=" + fst.getNodeCount() + "; arcCount=" + fst.getArcCount() + "]");
+
+        Arrays.fill(ints, 0);
+
+        output = 1;
+        r = new Random(seed);
+        for(int i=0;i<count;i++) {
+          if (i % 1000000 == 0) {
+            System.out.println(i + "...: ");
+          }
+
+          // forward lookup:
+          assertEquals(output, Util.get(fst, input).longValue());
+          // reverse lookup:
+          assertEquals(input, Util.getByOutput(fst, output));
+          output += 1 + r.nextInt(10);
+          nextInput(r, ints);
+        }
+
+        System.out.println("\nTEST: enum all input/outputs");
+        IntsRefFSTEnum<Long> fstEnum = new IntsRefFSTEnum<Long>(fst);
+
+        Arrays.fill(ints, 0);
+        r = new Random(seed);
+        int upto = 0;
+        output = 1;
+        while(true) {
+          IntsRefFSTEnum.InputOutput<Long> pair = fstEnum.next();
+          if (pair == null) {
+            break;
+          }
+          assertEquals(input, pair.input);
+          assertEquals(output, pair.output.longValue());
+          output += 1 + r.nextInt(10);
+          upto++;
+          nextInput(r, ints);
+        }
+        assertEquals(count, upto);
+      }
+    }
+  }
+
+  private void nextInput(Random r, int[] ints) {
+    int downTo = 6;
+    while(downTo >= 0) {
+      // Must add random amounts (and not just 1) because
+      // otherwise FST outsmarts us and remains tiny:
+      ints[downTo] += 1+r.nextInt(10);
+      if (ints[downTo] < 256) {
+        break;
+      } else {
+        ints[downTo] = 0;
+        downTo--;
+      }
+    }
+  }
+}
--- a/lucene/core/src/test/org/apache/lucene/util/fst/TestBytesStore.java
+++ b/lucene/core/src/test/org/apache/lucene/util/fst/TestBytesStore.java
@ -0,0 +1,360 @@
+package org.apache.lucene.util.fst;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.util.Arrays;
+
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.IOContext;
+import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.store.IndexOutput;
+import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.util._TestUtil;
+
+public class TestBytesStore extends LuceneTestCase {
+
+  public void testRandom() throws Exception {
+
+    final int iters = atLeast(10);
+    for(int iter=0;iter<iters;iter++) {
+      final int numBytes = _TestUtil.nextInt(random(), 1, 200000);
+      final byte[] expected = new byte[numBytes];
+      final int blockBits = _TestUtil.nextInt(random(), 8, 15);
+      final BytesStore bytes = new BytesStore(blockBits);
+      if (VERBOSE) {
+        System.out.println("TEST: iter=" + iter + " numBytes=" + numBytes + " blockBits=" + blockBits);
+      }
+
+      int pos = 0;
+      while(pos < numBytes) {
+        int op = random().nextInt(8);
+        if (VERBOSE) {
+          System.out.println("  cycle pos=" + pos);
+        }
+        switch(op) {
+
+        case 0:
+          {
+            // write random byte
+            byte b = (byte) random().nextInt(256);
+            if (VERBOSE) {
+              System.out.println("    writeByte b=" + b);
+            }
+
+            expected[pos++] = b;
+            bytes.writeByte(b);
+          }
+          break;
+
+        case 1:
+          {
+            // write random byte[]
+            int len = random().nextInt(Math.min(numBytes - pos, 100));
+            byte[] temp = new byte[len];
+            random().nextBytes(temp);
+            if (VERBOSE) {
+              System.out.println("    writeBytes len=" + len + " bytes=" + Arrays.toString(temp));
+            }
+            System.arraycopy(temp, 0, expected, pos, temp.length);
+            bytes.writeBytes(temp, 0, temp.length);
+            pos += len;
+          }
+          break;
+
+        case 2:
+          {
+            // write int @ absolute pos
+            if (pos > 4) {
+              int x = random().nextInt();
+              int randomPos = random().nextInt(pos-4);
+              if (VERBOSE) {
+                System.out.println("    abs writeInt pos=" + randomPos + " x=" + x);
+              }
+              bytes.writeInt(randomPos, x);
+              expected[randomPos++] = (byte) (x >> 24);
+              expected[randomPos++] = (byte) (x >> 16);
+              expected[randomPos++] = (byte) (x >> 8);
+              expected[randomPos++] = (byte) x;
+            }
+          }
+          break;
+
+        case 3:
+          {
+            // reverse bytes
+            if (pos > 1) {
+              int len = _TestUtil.nextInt(random(), 2, Math.min(100, pos));
+              int start;
+              if (len == pos) {
+                start = 0;
+              } else {
+                start = random().nextInt(pos - len);
+              }
+              int end = start + len - 1;
+              if (VERBOSE) {
+                System.out.println("    reverse start=" + start + " end=" + end + " len=" + len + " pos=" + pos);
+              }
+              bytes.reverse(start, end);
+
+              while(start <= end) {
+                byte b = expected[end];
+                expected[end] = expected[start];
+                expected[start] = b;
+                start++;
+                end--;
+              }
+            }
+          }
+          break;
+
+        case 4:
+          {
+            // abs write random byte[]
+            if (pos > 2) {
+              int randomPos = random().nextInt(pos-1);
+              int len = _TestUtil.nextInt(random(), 1, Math.min(pos - randomPos - 1, 100));
+              byte[] temp = new byte[len];
+              random().nextBytes(temp);
+              if (VERBOSE) {
+                System.out.println("    abs writeBytes pos=" + randomPos + " len=" + len + " bytes=" + Arrays.toString(temp));
+              }
+              System.arraycopy(temp, 0, expected, randomPos, temp.length);
+              bytes.writeBytes(randomPos, temp, 0, temp.length);
+            }
+          }
+          break;
+
+        case 5:
+          {
+            // copyBytes
+            if (pos > 1) {
+              int src = random().nextInt(pos-1);
+              int dest = _TestUtil.nextInt(random(), src+1, pos-1);
+              int len = _TestUtil.nextInt(random(), 1, Math.min(300, pos - dest));
+              if (VERBOSE) {
+                System.out.println("    copyBytes src=" + src + " dest=" + dest + " len=" + len);
+              }
+              System.arraycopy(expected, src, expected, dest, len);
+              bytes.copyBytes(src, dest, len);
+            }
+          }
+          break;
+
+        case 6:
+          {
+            // skip
+            int len = random().nextInt(Math.min(100, numBytes - pos));
+
+            if (VERBOSE) {
+              System.out.println("    skip len=" + len);
+            }
+
+            pos += len;
+            bytes.skipBytes(len);
+
+            // NOTE: must fill in zeros in case truncate was
+            // used, else we get false fails:
+            if (len > 0) {
+              byte[] zeros = new byte[len];
+              bytes.writeBytes(pos-len, zeros, 0, len);
+            }
+          }
+          break;
+
+        case 7:
+          {
+            // absWriteByte
+            if (pos > 0) {
+              int dest = random().nextInt(pos);
+              byte b = (byte) random().nextInt(256);
+              expected[dest] = b;
+              bytes.writeByte(dest, b);
+            }
+            break;
+          }
+        }
+
+        assertEquals(pos, bytes.getPosition());
+
+        if (pos > 0 && random().nextInt(50) == 17) {
+          // truncate
+          int len = _TestUtil.nextInt(random(), 1, Math.min(pos, 100));
+          bytes.truncate(pos - len);
+          pos -= len;
+          Arrays.fill(expected, pos, pos+len, (byte) 0);
+          if (VERBOSE) {
+            System.out.println("    truncate len=" + len + " newPos=" + pos);
+          }
+        }
+
+        if ((pos > 0 && random().nextInt(200) == 17)) {
+          verify(bytes, expected, pos);
+        }
+      }
+
+      BytesStore bytesToVerify;
+
+      if (random().nextBoolean()) {
+        if (VERBOSE) {
+          System.out.println("TEST: save/load final bytes");
+        }
+        Directory dir = newDirectory();
+        IndexOutput out = dir.createOutput("bytes", IOContext.DEFAULT);
+        bytes.writeTo(out);
+        out.close();
+        IndexInput in = dir.openInput("bytes", IOContext.DEFAULT);
+        bytesToVerify = new BytesStore(in, numBytes, _TestUtil.nextInt(random(), 256, Integer.MAX_VALUE));
+        in.close();
+        dir.close();
+      } else {
+        bytesToVerify = bytes;
+      }
+
+      verify(bytesToVerify, expected, numBytes);
+    }
+  }
+
+  private void verify(BytesStore bytes, byte[] expected, int totalLength) throws Exception {
+    assertEquals(totalLength, bytes.getPosition());
+    if (totalLength == 0) {
+      return;
+    }
+    if (VERBOSE) {
+      System.out.println("  verify...");
+    }
+    
+    // First verify whole thing in one blast:
+    byte[] actual = new byte[totalLength];
+    if (random().nextBoolean()) {
+      if (VERBOSE) {
+        System.out.println("    bulk: reversed");
+      }
+      // reversed
+      FST.BytesReader r = bytes.getReverseReader();
+      assertTrue(r.reversed());
+      r.setPosition(totalLength-1);
+      r.readBytes(actual, 0, actual.length);
+      int start = 0;
+      int end = totalLength - 1;
+      while(start < end) {
+        byte b = actual[start];
+        actual[start] = actual[end];
+        actual[end] = b;
+        start++;
+        end--;
+      }
+    } else {
+      // forward
+      if (VERBOSE) {
+        System.out.println("    bulk: forward");
+      }
+      FST.BytesReader r = bytes.getForwardReader();
+      assertFalse(r.reversed());
+      r.readBytes(actual, 0, actual.length);
+    }
+
+    for(int i=0;i<totalLength;i++) {
+      assertEquals("byte @ index=" + i, expected[i], actual[i]);
+    }
+
+    FST.BytesReader r;
+
+    // Then verify ops:
+    boolean reversed = random().nextBoolean();
+    if (reversed) {
+      if (VERBOSE) {
+        System.out.println("    ops: reversed");
+      }
+      r = bytes.getReverseReader();
+    } else {
+      if (VERBOSE) {
+        System.out.println("    ops: forward");
+      }
+      r = bytes.getForwardReader();
+    }
+
+    if (totalLength > 1) {
+      int numOps = _TestUtil.nextInt(random(), 100, 200);
+      for(int op=0;op<numOps;op++) {
+
+        int numBytes = random().nextInt(Math.min(1000, totalLength-1));
+        int pos;
+        if (reversed) {
+          pos = _TestUtil.nextInt(random(), numBytes, totalLength-1);
+        } else {
+          pos = random().nextInt(totalLength-numBytes);
+        }
+        if (VERBOSE) {
+          System.out.println("    op iter=" + op + " reversed=" + reversed + " numBytes=" + numBytes + " pos=" + pos);
+        }
+        byte[] temp = new byte[numBytes];
+        r.setPosition(pos);
+        assertEquals(pos, r.getPosition());
+        r.readBytes(temp, 0, temp.length);
+        for(int i=0;i<numBytes;i++) {
+          byte expectedByte;
+          if (reversed) {
+            expectedByte = expected[pos - i];
+          } else {
+            expectedByte = expected[pos + i];
+          }
+          assertEquals("byte @ index=" + i, expectedByte, temp[i]);
+        }
+
+        int left;
+        int expectedPos;
+
+        if (reversed) {
+          expectedPos = pos-numBytes;
+          left = (int) r.getPosition();
+        } else {
+          expectedPos = pos+numBytes;
+          left = (int) (totalLength - r.getPosition());
+        }
+        assertEquals(expectedPos, r.getPosition());
+
+        if (left > 4) {
+          int skipBytes = random().nextInt(left-4);
+
+          int expectedInt = 0;
+          if (reversed) {
+            expectedPos -= skipBytes;
+            expectedInt |= (expected[expectedPos--]&0xFF)<<24;
+            expectedInt |= (expected[expectedPos--]&0xFF)<<16;
+            expectedInt |= (expected[expectedPos--]&0xFF)<<8;
+            expectedInt |= (expected[expectedPos--]&0xFF);
+          } else {
+            expectedPos += skipBytes;
+            expectedInt |= (expected[expectedPos++]&0xFF)<<24;
+            expectedInt |= (expected[expectedPos++]&0xFF)<<16;
+            expectedInt |= (expected[expectedPos++]&0xFF)<<8;
+            expectedInt |= (expected[expectedPos++]&0xFF);
+          }
+
+          if (VERBOSE) {
+            System.out.println("    skip numBytes=" + skipBytes);
+            System.out.println("    readInt");
+          }
+
+          r.skipBytes(skipBytes);
+          assertEquals(expectedInt, r.readInt());
+        }
+      }
+    }
+  }
+}
--- a/lucene/core/src/test/org/apache/lucene/util/fst/TestFSTs.java
+++ b/lucene/core/src/test/org/apache/lucene/util/fst/TestFSTs.java
@ -310,7 +310,7 @@ public class TestFSTs extends LuceneTestCase {

    final boolean doRewrite = random().nextBoolean();

-    Builder<Long> builder = new Builder<Long>(FST.INPUT_TYPE.BYTE1, 0, 0, true, true, Integer.MAX_VALUE, outputs, null, doRewrite, true);
+    Builder<Long> builder = new Builder<Long>(FST.INPUT_TYPE.BYTE1, 0, 0, true, true, Integer.MAX_VALUE, outputs, null, doRewrite, PackedInts.DEFAULT, true, 15);

    boolean storeOrd = random().nextBoolean();
    if (VERBOSE) {
@ -453,7 +453,7 @@ public class TestFSTs extends LuceneTestCase {
      this.outputs = outputs;
      this.doPack = doPack;

-      builder = new Builder<T>(inputMode == 0 ? FST.INPUT_TYPE.BYTE1 : FST.INPUT_TYPE.BYTE4, 0, prune, prune == 0, true, Integer.MAX_VALUE, outputs, null, doPack, !noArcArrays);
+      builder = new Builder<T>(inputMode == 0 ? FST.INPUT_TYPE.BYTE1 : FST.INPUT_TYPE.BYTE4, 0, prune, prune == 0, true, Integer.MAX_VALUE, outputs, null, doPack, PackedInts.DEFAULT, !noArcArrays, 15);
    }

    protected abstract T getOutput(IntsRef input, int ord) throws IOException;
@ -484,8 +484,13 @@ public class TestFSTs extends LuceneTestCase {
          }
        }

+        long tMid = System.currentTimeMillis();
+        System.out.println(((tMid-tStart) / 1000.0) + " sec to add all terms");
+
        assert builder.getTermCount() == ord;
        FST<T> fst = builder.finish();
+        long tEnd = System.currentTimeMillis();
+        System.out.println(((tEnd-tMid) / 1000.0) + " sec to finish/pack");
        if (fst == null) {
          System.out.println("FST was fully pruned!");
          System.exit(0);
@ -513,6 +518,12 @@ public class TestFSTs extends LuceneTestCase {
          return;
        }

+        /*
+        IndexInput in = dir.openInput("fst.bin", IOContext.DEFAULT);
+        fst = new FST<T>(in, outputs);
+        in.close();
+        */
+
        System.out.println("\nNow verify...");

        while(true) {
@ -576,7 +587,7 @@ public class TestFSTs extends LuceneTestCase {
    }
  }

-  // java -cp build/classes/test:build/classes/test-framework:build/classes/java:lib/junit-4.10.jar org.apache.lucene.util.fst.TestFSTs /x/tmp/allTerms3.txt out
+  // java -cp ../build/codecs/classes/java:../test-framework/lib/randomizedtesting-runner-2.0.8.jar:../build/core/classes/test:../build/core/classes/test-framework:../build/core/classes/java:../build/test-framework/classes/java:../test-framework/lib/junit-4.10.jar org.apache.lucene.util.fst.TestFSTs /xold/tmp/allTerms3.txt out
  public static void main(String[] args) throws IOException {
    int prune = 0;
    int limit = Integer.MAX_VALUE;
@ -1022,7 +1033,7 @@ public class TestFSTs extends LuceneTestCase {
        throws IOException {
        if (FST.targetHasArcs(arc)) {
          int childCount = 0;
-          FST.BytesReader fstReader = fst.getBytesReader(0);
+          BytesReader fstReader = fst.getBytesReader(0);
          for (arc = fst.readFirstTargetArc(arc, arc, fstReader);; 
               arc = fst.readNextArc(arc, fstReader), childCount++)
          {
@ -1062,7 +1073,7 @@ public class TestFSTs extends LuceneTestCase {
  public void testFinalOutputOnEndState() throws Exception {
    final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(true);

-    final Builder<Long> builder = new Builder<Long>(FST.INPUT_TYPE.BYTE4, 2, 0, true, true, Integer.MAX_VALUE, outputs, null, random().nextBoolean(), true);
+    final Builder<Long> builder = new Builder<Long>(FST.INPUT_TYPE.BYTE4, 2, 0, true, true, Integer.MAX_VALUE, outputs, null, random().nextBoolean(), PackedInts.DEFAULT, true, 15);
    builder.add(Util.toUTF32("stat", new IntsRef()), 17L);
    builder.add(Util.toUTF32("station", new IntsRef()), 10L);
    final FST<Long> fst = builder.finish();
@ -1077,7 +1088,7 @@ public class TestFSTs extends LuceneTestCase {
  public void testInternalFinalState() throws Exception {
    final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(true);
    final boolean willRewrite = random().nextBoolean();
-    final Builder<Long> builder = new Builder<Long>(FST.INPUT_TYPE.BYTE1, 0, 0, true, true, Integer.MAX_VALUE, outputs, null, willRewrite, true);
+    final Builder<Long> builder = new Builder<Long>(FST.INPUT_TYPE.BYTE1, 0, 0, true, true, Integer.MAX_VALUE, outputs, null, willRewrite, PackedInts.DEFAULT, true, 15);
    builder.add(Util.toIntsRef(new BytesRef("stat"), new IntsRef()), outputs.getNoOutput());
    builder.add(Util.toIntsRef(new BytesRef("station"), new IntsRef()), outputs.getNoOutput());
    final FST<Long> fst = builder.finish();
@ -1100,7 +1111,7 @@ public class TestFSTs extends LuceneTestCase {
    final Long nothing = outputs.getNoOutput();
    final Builder<Long> b = new Builder<Long>(FST.INPUT_TYPE.BYTE1, outputs);

-    final FST<Long> fst = new FST<Long>(FST.INPUT_TYPE.BYTE1, outputs, false, PackedInts.COMPACT, true);
+    final FST<Long> fst = new FST<Long>(FST.INPUT_TYPE.BYTE1, outputs, false, PackedInts.COMPACT, true, 15);

    final Builder.UnCompiledNode<Long> rootNode = new Builder.UnCompiledNode<Long>(b, 0);

--- a/lucene/demo/src/java/org/apache/lucene/demo/SearchFiles.java
+++ b/lucene/demo/src/java/org/apache/lucene/demo/SearchFiles.java
@ -46,7 +46,7 @@ public class SearchFiles {
  /** Simple command-line based search demo. */
  public static void main(String[] args) throws Exception {
    String usage =
-      "Usage:\tjava org.apache.lucene.demo.SearchFiles [-index dir] [-field f] [-repeat n] [-queries file] [-query string] [-raw] [-paging hitsPerPage]\n\nSee http://lucene.apache.org/java/4_0/demo.html for details.";
+      "Usage:\tjava org.apache.lucene.demo.SearchFiles [-index dir] [-field f] [-repeat n] [-queries file] [-query string] [-raw] [-paging hitsPerPage]\n\nSee http://lucene.apache.org/core/4_1_0/demo/ for details.";
    if (args.length > 0 && ("-h".equals(args[0]) || "-help".equals(args[0]))) {
      System.out.println(usage);
      System.exit(0);
--- a/lucene/facet/src/java/org/apache/lucene/facet/associations/AssociationsPayloadIterator.java
+++ b/lucene/facet/src/java/org/apache/lucene/facet/associations/AssociationsPayloadIterator.java
@ -3,7 +3,7 @@ package org.apache.lucene.facet.associations;
 import java.io.IOException;

 import org.apache.lucene.facet.search.PayloadIterator;
-import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.AtomicReaderContext;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.store.ByteArrayDataInput;
 import org.apache.lucene.util.BytesRef;
@ -46,12 +46,21 @@ public abstract class AssociationsPayloadIterator<T extends CategoryAssociation>
   * It is assumed that all association values can be deserialized with the
   * given {@link CategoryAssociation}.
   */
-  public AssociationsPayloadIterator(IndexReader reader, String field, T association) throws IOException {
-    pi = new PayloadIterator(reader, new Term(field, association.getCategoryListID()));
-    hasAssociations = pi.init();
+  public AssociationsPayloadIterator(String field, T association) throws IOException {
+    pi = new PayloadIterator(new Term(field, association.getCategoryListID()));
    this.association = association;
  }

+  /**
+   * Sets the {@link AtomicReaderContext} for which {@link #setNextDoc(int)}
+   * calls will be made. Returns true iff this reader has associations for any
+   * of the documents belonging to the association given to the constructor.
+   */
+  public final boolean setNextReader(AtomicReaderContext context) throws IOException {
+    hasAssociations = pi.setNextReader(context);
+    return hasAssociations;
+  }
+  
  /**
   * Skip to the requested document. Returns true iff the document has category
   * association values and they were read successfully. Associations are
--- a/lucene/facet/src/java/org/apache/lucene/facet/associations/FloatAssociationsPayloadIterator.java
+++ b/lucene/facet/src/java/org/apache/lucene/facet/associations/FloatAssociationsPayloadIterator.java
@ -2,7 +2,6 @@ package org.apache.lucene.facet.associations;

 import java.io.IOException;

-import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.util.collections.IntToFloatMap;

 /*
@ -31,9 +30,8 @@ public class FloatAssociationsPayloadIterator extends AssociationsPayloadIterato

  private final IntToFloatMap ordinalAssociations = new IntToFloatMap();

-  public FloatAssociationsPayloadIterator(IndexReader reader, String field, CategoryFloatAssociation association) 
-      throws IOException {
-    super(reader, field, association);
+  public FloatAssociationsPayloadIterator(String field, CategoryFloatAssociation association) throws IOException {
+    super(field, association);
  }

  @Override
--- a/lucene/facet/src/java/org/apache/lucene/facet/associations/IntAssociationsPayloadIterator.java
+++ b/lucene/facet/src/java/org/apache/lucene/facet/associations/IntAssociationsPayloadIterator.java
@ -2,7 +2,6 @@ package org.apache.lucene.facet.associations;

 import java.io.IOException;

-import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.util.collections.IntToIntMap;

 /*
@ -31,9 +30,8 @@ public class IntAssociationsPayloadIterator extends AssociationsPayloadIterator<

  private final IntToIntMap ordinalAssociations = new IntToIntMap();

-  public IntAssociationsPayloadIterator(IndexReader reader, String field, CategoryIntAssociation association) 
-      throws IOException {
-    super(reader, field, association);
+  public IntAssociationsPayloadIterator(String field, CategoryIntAssociation association) throws IOException {
+    super(field, association);
  }

  @Override
--- a/lucene/facet/src/java/org/apache/lucene/facet/index/params/CategoryListParams.java
+++ b/lucene/facet/src/java/org/apache/lucene/facet/index/params/CategoryListParams.java
@ -3,13 +3,10 @@ package org.apache.lucene.facet.index.params;
 import java.io.IOException;
 import java.io.Serializable;

-import org.apache.lucene.index.IndexReader;
-import org.apache.lucene.index.Term;
-
 import org.apache.lucene.facet.search.CategoryListIterator;
 import org.apache.lucene.facet.search.PayloadCategoryListIteraor;
-import org.apache.lucene.facet.search.TotalFacetCounts;
 import org.apache.lucene.facet.util.PartitionsUtils;
+import org.apache.lucene.index.Term;
 import org.apache.lucene.util.encoding.DGapIntEncoder;
 import org.apache.lucene.util.encoding.IntDecoder;
 import org.apache.lucene.util.encoding.IntEncoder;
@ -98,11 +95,6 @@ public class CategoryListParams implements Serializable {
    return new SortingIntEncoder(new UniqueValuesIntEncoder(new DGapIntEncoder(new VInt8IntEncoder())));
  }

-  /**
-   * Equality is defined by the 'term' that defines this category list.  
-   * Sub-classes should override this method if a more complex calculation
-   * is needed to ensure equality. 
-   */
  @Override
  public boolean equals(Object o) {
    if (o == this) {
@ -121,29 +113,16 @@ public class CategoryListParams implements Serializable {
    return this.term.equals(other.term);
  }

-  /**
-   * Hashcode is similar to {@link #equals(Object)}, in that it uses
-   * the term that defines this category list to derive the hashcode.
-   * Subclasses need to ensure that equality/hashcode is correctly defined,
-   * or there could be side-effects in the {@link TotalFacetCounts} caching 
-   * mechanism (as the filename for a Total Facet Counts array cache 
-   * is dependent on the hashCode, so it should consistently return the same
-   * hash for identity).
-   */
  @Override
  public int hashCode() {
    return this.hashCode;
  }

-  /**
-   * Create the category list iterator for the specified partition.
-   */
-  public CategoryListIterator createCategoryListIterator(IndexReader reader,
-      int partition) throws IOException {
+  /** Create the {@link CategoryListIterator} for the specified partition. */
+  public CategoryListIterator createCategoryListIterator(int partition) throws IOException {
    String categoryListTermStr = PartitionsUtils.partitionName(this, partition);
    Term payloadTerm = new Term(term.field(), categoryListTermStr);
-    return new PayloadCategoryListIteraor(reader, payloadTerm,
-        createEncoder().createMatchingDecoder());
+    return new PayloadCategoryListIteraor(payloadTerm, createEncoder().createMatchingDecoder());
  }
  
 }
--- a/lucene/facet/src/java/org/apache/lucene/facet/search/CategoryListIterator.java
+++ b/lucene/facet/src/java/org/apache/lucene/facet/search/CategoryListIterator.java
@ -2,6 +2,7 @@ package org.apache.lucene.facet.search;

 import java.io.IOException;

+import org.apache.lucene.index.AtomicReaderContext;
 import org.apache.lucene.util.IntsRef;

 /*
@ -23,6 +24,8 @@ import org.apache.lucene.util.IntsRef;

 /**
 * An interface for obtaining the category ordinals of documents.
+ * {@link #getOrdinals(int, IntsRef)} calls are done with document IDs that are
+ * local to the reader given to {@link #setNextReader(AtomicReaderContext)}.
 * <p>
 * <b>NOTE:</b> this class operates as a key to a map, and therefore you should
 * implement {@code equals()} and {@code hashCode()} for proper behavior.
@ -32,11 +35,12 @@ import org.apache.lucene.util.IntsRef;
 public interface CategoryListIterator {

  /**
-   * Initializes the iterator. This method must be called before any calls to
-   * {@link #getOrdinals(int, IntsRef)}, and its return value indicates whether there are
-   * any relevant documents for this iterator.
+   * Sets the {@link AtomicReaderContext} for which
+   * {@link #getOrdinals(int, IntsRef)} calls will be made. Returns true iff any
+   * of the documents in this reader have category ordinals. This method must be
+   * called before any calls to {@link #getOrdinals(int, IntsRef)}.
   */
-  public boolean init() throws IOException;
+  public boolean setNextReader(AtomicReaderContext context) throws IOException;
  
  /**
   * Stores the category ordinals of the given document ID in the given
@ -44,7 +48,7 @@ public interface CategoryListIterator {
   * the {@link IntsRef} if it is not large enough.
   * 
   * <p>
-   * <b>NOTE:</b> if the requested document does not category ordinals
+   * <b>NOTE:</b> if the requested document does not have category ordinals
   * associated with it, {@link IntsRef#length} is set to zero.
   */
  public void getOrdinals(int docID, IntsRef ints) throws IOException;
--- a/lucene/facet/src/java/org/apache/lucene/facet/search/PayloadCategoryListIteraor.java
+++ b/lucene/facet/src/java/org/apache/lucene/facet/search/PayloadCategoryListIteraor.java
@ -2,7 +2,7 @@ package org.apache.lucene.facet.search;

 import java.io.IOException;

-import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.AtomicReaderContext;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.IntsRef;
@ -34,17 +34,15 @@ import org.apache.lucene.util.encoding.IntDecoder;
 public class PayloadCategoryListIteraor implements CategoryListIterator {

  private final IntDecoder decoder;
-  private final IndexReader indexReader;
  private final Term term;
  private final PayloadIterator pi;
  private final int hashCode;
  
-  public PayloadCategoryListIteraor(IndexReader indexReader, Term term, IntDecoder decoder) throws IOException {
-    pi = new PayloadIterator(indexReader, term);
+  public PayloadCategoryListIteraor(Term term, IntDecoder decoder) throws IOException {
+    pi = new PayloadIterator(term);
    this.decoder = decoder;
-    hashCode = indexReader.hashCode() ^ term.hashCode();
+    hashCode = term.hashCode();
    this.term = term;
-    this.indexReader = indexReader;
  }

  @Override
@ -58,7 +56,7 @@ public class PayloadCategoryListIteraor implements CategoryListIterator {
    }
    
    // Hash codes are the same, check equals() to avoid cases of hash-collisions.
-    return indexReader.equals(that.indexReader) && term.equals(that.term);
+    return term.equals(that.term);
  }

  @Override
@ -67,8 +65,8 @@ public class PayloadCategoryListIteraor implements CategoryListIterator {
  }

  @Override
-  public boolean init() throws IOException {
-    return pi.init();
+  public boolean setNextReader(AtomicReaderContext context) throws IOException {
+    return pi.setNextReader(context);
  }
  
  @Override
--- a/lucene/facet/src/java/org/apache/lucene/facet/search/PayloadIterator.java
+++ b/lucene/facet/src/java/org/apache/lucene/facet/search/PayloadIterator.java
@ -1,12 +1,10 @@
 package org.apache.lucene.facet.search;

 import java.io.IOException;
-import java.util.Iterator;

 import org.apache.lucene.index.AtomicReaderContext;
 import org.apache.lucene.index.DocsAndPositionsEnum;
 import org.apache.lucene.index.Fields;
-import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.index.Terms;
 import org.apache.lucene.index.TermsEnum;
@ -42,27 +40,25 @@ import org.apache.lucene.util.BytesRef;
 */
 public class PayloadIterator {

-  protected BytesRef data;
-
  private TermsEnum reuseTE;
-  private DocsAndPositionsEnum currentDPE;
+  private DocsAndPositionsEnum dpe;
  private boolean hasMore;
-  private int curDocID, curDocBase;
+  private int curDocID;
  
-  private final Iterator<AtomicReaderContext> leaves;
  private final Term term;

-  public PayloadIterator(IndexReader indexReader, Term term) throws IOException {
-    leaves = indexReader.leaves().iterator();
+  public PayloadIterator(Term term) throws IOException {
    this.term = term;
  }

-  private void nextSegment() throws IOException {
+  /**
+   * Sets the {@link AtomicReaderContext} for which {@link #getPayload(int)}
+   * calls will be made. Returns true iff this reader has payload for any of the
+   * documents belonging to the {@link Term} given to the constructor.
+   */
+  public boolean setNextReader(AtomicReaderContext context) throws IOException {
    hasMore = false;
-    while (leaves.hasNext()) {
-      AtomicReaderContext ctx = leaves.next();
-      curDocBase = ctx.docBase;
-      Fields fields = ctx.reader().fields();
+    Fields fields = context.reader().fields();
    if (fields != null) {
      Terms terms = fields.terms(term.field());
      if (terms != null) {
@ -71,70 +67,48 @@ public class PayloadIterator {
          // this class is usually used to iterate on whatever a Query matched
          // if it didn't match deleted documents, we won't receive them. if it
          // did, we should iterate on them too, therefore we pass liveDocs=null
-            currentDPE = reuseTE.docsAndPositions(null, currentDPE, DocsAndPositionsEnum.FLAG_PAYLOADS);
-            if (currentDPE != null && (curDocID = currentDPE.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
+          dpe = reuseTE.docsAndPositions(null, dpe, DocsAndPositionsEnum.FLAG_PAYLOADS);
+          if (dpe != null && (curDocID = dpe.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
            hasMore = true;
-              break;
          }
        }
      }
    }
-    }
-  }
-  
-  /**
-   * Initialize the iterator. Should be done before the first call to
-   * {@link #getPayload(int)}. Returns {@code false} if no category list is
-   * found, or the category list has no documents.
-   */
-  public boolean init() throws IOException {
-    nextSegment();
    return hasMore;
  }
  
  /**
   * Returns the {@link BytesRef payload} of the given document, or {@code null}
   * if the document does not exist, there are no more documents in the posting
-   * list, or the document exists but has not payload. You should call
-   * {@link #init()} before the first call to this method.
+   * list, or the document exists but has not payload. The given document IDs
+   * are treated as local to the reader given to
+   * {@link #setNextReader(AtomicReaderContext)}.
   */
  public BytesRef getPayload(int docID) throws IOException {
    if (!hasMore) {
      return null;
    }

-    // re-basing docId->localDocID is done fewer times than currentDoc->globalDoc
-    int localDocID = docID - curDocBase;
-    
-    if (curDocID > localDocID) {
+    if (curDocID > docID) {
      // document does not exist
      return null;
    }
    
-    if (curDocID < localDocID) {
-      // look for the document either in that segment, or others
-      while (hasMore && (curDocID = currentDPE.advance(localDocID)) == DocIdSetIterator.NO_MORE_DOCS) {
-        nextSegment(); // also updates curDocID
-        localDocID = docID - curDocBase;
-        // nextSegment advances to nextDoc, so check if we still need to advance
-        if (curDocID >= localDocID) {
-          break;
+    if (curDocID < docID) {
+      curDocID = dpe.advance(docID);
+      if (curDocID != docID) { // requested document does not have a payload
+        if (curDocID == DocIdSetIterator.NO_MORE_DOCS) { // no more docs in this reader
+          hasMore = false;
        }
-      }
-      
-      // we break from the above loop when:
-      // 1. we iterated over all segments (hasMore=false)
-      // 2. current segment advanced to a doc, either requested or higher
-      if (!hasMore || curDocID != localDocID) {
        return null;
      }
    }

    // we're on the document
-    assert currentDPE.freq() == 1 : "expecting freq=1 (got " + currentDPE.freq() + ") term=" + term + " doc=" + (curDocID + curDocBase);
-    int pos = currentDPE.nextPosition();
-    assert pos != -1 : "no positions for term=" + term + " doc=" + (curDocID + curDocBase);
-    return currentDPE.getPayload();
+    assert dpe.freq() == 1 : "expecting freq=1 (got " + dpe.freq() + ") term=" + term + " doc=" + curDocID;
+    int pos = dpe.nextPosition();
+    assert pos != -1 : "no positions for term=" + term + " doc=" + curDocID;
+    return dpe.getPayload();
  }
  
 }
--- a/lucene/facet/src/java/org/apache/lucene/facet/search/ScoredDocIdCollector.java
+++ b/lucene/facet/src/java/org/apache/lucene/facet/search/ScoredDocIdCollector.java
@ -62,7 +62,7 @@ public abstract class ScoredDocIdCollector extends Collector {
    }

    @Override
-    public ScoredDocIDsIterator scoredDocIdsIterator() {
+    protected ScoredDocIDsIterator scoredDocIdsIterator() {
      return new ScoredDocIDsIterator() {

        private DocIdSetIterator docIdsIter = docIds.iterator();
@ -129,7 +129,7 @@ public abstract class ScoredDocIdCollector extends Collector {
    }

    @Override
-    public ScoredDocIDsIterator scoredDocIdsIterator() {
+    protected ScoredDocIDsIterator scoredDocIdsIterator() {
      return new ScoredDocIDsIterator() {

        private DocIdSetIterator docIdsIter = docIds.iterator();
@ -189,8 +189,7 @@ public abstract class ScoredDocIdCollector extends Collector {
   *        do not require scoring, it is better to set it to <i>false</i>.
   */
  public static ScoredDocIdCollector create(int maxDoc, boolean enableScoring) {
-    return enableScoring   ? new ScoringDocIdCollector(maxDoc)
-                          : new NonScoringDocIdCollector(maxDoc);
+    return enableScoring ? new ScoringDocIdCollector(maxDoc) : new NonScoringDocIdCollector(maxDoc);
  }

  private ScoredDocIdCollector(int maxDoc) {
@ -198,13 +197,14 @@ public abstract class ScoredDocIdCollector extends Collector {
    docIds = new FixedBitSet(maxDoc);
  }

+  protected abstract ScoredDocIDsIterator scoredDocIdsIterator() throws IOException;
+
  /** Returns the default score used when scoring is disabled. */
  public abstract float getDefaultScore();

  /** Set the default score. Only applicable if scoring is disabled. */
  public abstract void setDefaultScore(float defaultScore);

-  public abstract ScoredDocIDsIterator scoredDocIdsIterator() throws IOException;

  public ScoredDocIDs getScoredDocIDs() {
    return new ScoredDocIDs() {
--- a/lucene/facet/src/java/org/apache/lucene/facet/search/StandardFacetsAccumulator.java
+++ b/lucene/facet/src/java/org/apache/lucene/facet/search/StandardFacetsAccumulator.java
@ -4,22 +4,23 @@ import java.io.IOException;
 import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.HashSet;
+import java.util.Iterator;
 import java.util.List;
 import java.util.Map.Entry;
 import java.util.logging.Level;
 import java.util.logging.Logger;

-import org.apache.lucene.index.IndexReader;
-import org.apache.lucene.util.IntsRef;
-
 import org.apache.lucene.facet.search.aggregator.Aggregator;
-import org.apache.lucene.facet.search.params.FacetSearchParams;
 import org.apache.lucene.facet.search.params.FacetRequest;
+import org.apache.lucene.facet.search.params.FacetSearchParams;
 import org.apache.lucene.facet.search.results.FacetResult;
 import org.apache.lucene.facet.search.results.IntermediateFacetResult;
 import org.apache.lucene.facet.taxonomy.TaxonomyReader;
 import org.apache.lucene.facet.util.PartitionsUtils;
 import org.apache.lucene.facet.util.ScoredDocIdsUtils;
+import org.apache.lucene.index.AtomicReaderContext;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.util.IntsRef;

 /*
 * Licensed to the Apache Software Foundation (ASF) under one or more
@ -213,18 +214,15 @@ public class StandardFacetsAccumulator extends FacetsAccumulator {

  /** Check if it is worth to use complements */
  protected boolean shouldComplement(ScoredDocIDs docids) {
-    return 
-      mayComplement() && 
-      (docids.size() > indexReader.numDocs() * getComplementThreshold()) ;
+    return mayComplement() && (docids.size() > indexReader.numDocs() * getComplementThreshold()) ;
  }

  /**
   * Iterate over the documents for this partition and fill the facet arrays with the correct
   * count/complement count/value.
-   * @throws IOException If there is a low-level I/O error.
   */
-  private final void fillArraysForPartition(ScoredDocIDs docids,
-      FacetArrays facetArrays, int partition) throws IOException {
+  private final void fillArraysForPartition(ScoredDocIDs docids, FacetArrays facetArrays, int partition) 
+      throws IOException {
    
    if (isUsingComplements) {
      initArraysByTotalCounts(facetArrays, partition, docids.size());
@ -236,27 +234,41 @@ public class StandardFacetsAccumulator extends FacetsAccumulator {

    IntsRef ordinals = new IntsRef(32); // a reasonable start capacity for most common apps
    for (Entry<CategoryListIterator, Aggregator> entry : categoryLists.entrySet()) {
-      CategoryListIterator categoryList = entry.getKey();
-      if (!categoryList.init()) {
-        continue;
-      }
-
-      Aggregator categorator = entry.getValue();
-      ScoredDocIDsIterator iterator = docids.iterator();
+      final ScoredDocIDsIterator iterator = docids.iterator();
+      final CategoryListIterator categoryListIter = entry.getKey();
+      final Aggregator aggregator = entry.getValue();
+      Iterator<AtomicReaderContext> contexts = indexReader.leaves().iterator();
+      AtomicReaderContext current = null;
+      int maxDoc = -1;
      while (iterator.next()) {
        int docID = iterator.getDocID();
-        categoryList.getOrdinals(docID, ordinals);
-        if (ordinals.length == 0) {
-          continue;
+        while (docID >= maxDoc) { // find the segment which contains this document
+          if (!contexts.hasNext()) {
+            throw new RuntimeException("ScoredDocIDs contains documents outside this reader's segments !?");
          }
-        categorator.aggregate(docID, iterator.getScore(), ordinals);
+          current = contexts.next();
+          maxDoc = current.docBase + current.reader().maxDoc();
+          if (docID < maxDoc) { // segment has docs, check if it has categories
+            boolean validSegment = categoryListIter.setNextReader(current);
+            validSegment &= aggregator.setNextReader(current);
+            if (!validSegment) { // if categoryList or aggregtor say it's an invalid segment, skip all docs
+              while (docID < maxDoc && iterator.next()) {
+                docID = iterator.getDocID();
+              }
+            }
+          }
+        }
+        docID -= current.docBase;
+        categoryListIter.getOrdinals(docID, ordinals);
+        if (ordinals.length == 0) {
+          continue; // document does not have category ordinals
+        }
+        aggregator.aggregate(docID, iterator.getScore(), ordinals);
      }
    }
  }

-  /**
-   * Init arrays for partition by total counts, optionally applying a factor
-   */
+  /** Init arrays for partition by total counts, optionally applying a factor */
  private final void initArraysByTotalCounts(FacetArrays facetArrays, int partition, int nAccumulatedDocs) {
    int[] intArray = facetArrays.getIntArray();
    totalFacetCounts.fillTotalCountsForPartition(intArray, partition);
@ -302,10 +314,9 @@ public class StandardFacetsAccumulator extends FacetsAccumulator {

    for (FacetRequest facetRequest : searchParams.getFacetRequests()) {
      Aggregator categoryAggregator = facetRequest.createAggregator(
-          isUsingComplements, facetArrays, indexReader,  taxonomyReader);
+          isUsingComplements, facetArrays, taxonomyReader);

-      CategoryListIterator cli = 
-        facetRequest.createCategoryListIterator(indexReader, taxonomyReader, searchParams, partition);
+      CategoryListIterator cli = facetRequest.createCategoryListIterator(taxonomyReader, searchParams, partition);
      
      // get the aggregator
      Aggregator old = categoryLists.put(cli, categoryAggregator);
--- a/lucene/facet/src/java/org/apache/lucene/facet/search/TotalFacetCounts.java
+++ b/lucene/facet/src/java/org/apache/lucene/facet/search/TotalFacetCounts.java
@ -170,7 +170,7 @@ public class TotalFacetCounts {
        Aggregator aggregator = new CountingAggregator(counts[partition]);
        HashMap<CategoryListIterator, Aggregator> map = new HashMap<CategoryListIterator, Aggregator>();
        for (CategoryListParams clp: facetIndexingParams.getAllCategoryListParams()) {
-          final CategoryListIterator cli = clIteraor(clCache, clp, indexReader, partition);
+          final CategoryListIterator cli = clIteraor(clCache, clp, partition);
          map.put(cli, aggregator);
        }
        return map;
@ -181,14 +181,14 @@ public class TotalFacetCounts {
    return new TotalFacetCounts(taxonomy, facetIndexingParams, counts, CreationType.Computed);
  }
  
-  static CategoryListIterator clIteraor(CategoryListCache clCache, CategoryListParams clp, 
-      IndexReader indexReader, int partition) throws IOException {
+  static CategoryListIterator clIteraor(CategoryListCache clCache, CategoryListParams clp, int partition) 
+      throws IOException {
    if (clCache != null) {
      CategoryListData cld = clCache.get(clp);
      if (cld != null) {
        return cld.iterator(partition);
      }
    }
-    return clp.createCategoryListIterator(indexReader, partition);
+    return clp.createCategoryListIterator(partition);
  }
 }
--- a/lucene/facet/src/java/org/apache/lucene/facet/search/aggregator/Aggregator.java
+++ b/lucene/facet/src/java/org/apache/lucene/facet/search/aggregator/Aggregator.java
@ -2,6 +2,7 @@ package org.apache.lucene.facet.search.aggregator;

 import java.io.IOException;

+import org.apache.lucene.index.AtomicReaderContext;
 import org.apache.lucene.util.IntsRef;

 /*
@ -22,21 +23,22 @@ import org.apache.lucene.util.IntsRef;
 */

 /**
- * An Aggregator is the analogue of Lucene's Collector (see
- * {@link org.apache.lucene.search.Collector}), for processing the categories
- * belonging to a certain document. The Aggregator is responsible for doing
- * whatever it wishes with the categories it is fed, e.g., counting the number
- * of times that each category appears, or performing some computation on their
- * association values.
- * <P>
- * Much of the function of an Aggregator implementation is not described by this
- * interface. This includes the constructor and getter methods to retrieve the
- * results of the aggregation.
+ * Aggregates the categories of documents given to
+ * {@link #aggregate(int, float, IntsRef)}. Note that the document IDs are local
+ * to the reader given to {@link #setNextReader(AtomicReaderContext)}.
 * 
 * @lucene.experimental
 */
 public interface Aggregator {

+  /**
+   * Sets the {@link AtomicReaderContext} for which
+   * {@link #aggregate(int, float, IntsRef)} calls will be made. If this method
+   * returns false, {@link #aggregate(int, float, IntsRef)} should not be called
+   * for this reader.
+   */
+  public boolean setNextReader(AtomicReaderContext context) throws IOException;
+  
  /**
   * Aggregate the ordinals of the given document ID (and its score). The given
   * ordinals offset is always zero.
--- a/lucene/facet/src/java/org/apache/lucene/facet/search/aggregator/CountingAggregator.java
+++ b/lucene/facet/src/java/org/apache/lucene/facet/search/aggregator/CountingAggregator.java
@ -2,6 +2,7 @@ package org.apache.lucene.facet.search.aggregator;

 import java.io.IOException;

+import org.apache.lucene.index.AtomicReaderContext;
 import org.apache.lucene.util.IntsRef;

 /*
@ -57,4 +58,9 @@ public class CountingAggregator implements Aggregator {
    return counterArray == null ? 0 : counterArray.hashCode();
  }
  
+  @Override
+  public boolean setNextReader(AtomicReaderContext context) throws IOException {
+    return true;
+  }
+  
 }
--- a/lucene/facet/src/java/org/apache/lucene/facet/search/aggregator/ScoringAggregator.java
+++ b/lucene/facet/src/java/org/apache/lucene/facet/search/aggregator/ScoringAggregator.java
@ -2,6 +2,7 @@ package org.apache.lucene.facet.search.aggregator;

 import java.io.IOException;

+import org.apache.lucene.index.AtomicReaderContext;
 import org.apache.lucene.util.IntsRef;

 /*
@ -58,4 +59,9 @@ public class ScoringAggregator implements Aggregator {
    return hashCode;
  }

+  @Override
+  public boolean setNextReader(AtomicReaderContext context) throws IOException {
+    return true;
+  }
+  
 }
--- a/lucene/facet/src/java/org/apache/lucene/facet/search/aggregator/associations/AssociationFloatSumAggregator.java
+++ b/lucene/facet/src/java/org/apache/lucene/facet/search/aggregator/associations/AssociationFloatSumAggregator.java
@ -6,7 +6,7 @@ import org.apache.lucene.facet.associations.CategoryFloatAssociation;
 import org.apache.lucene.facet.associations.FloatAssociationsPayloadIterator;
 import org.apache.lucene.facet.index.params.CategoryListParams;
 import org.apache.lucene.facet.search.aggregator.Aggregator;
-import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.AtomicReaderContext;
 import org.apache.lucene.util.IntsRef;
 import org.apache.lucene.util.collections.IntToFloatMap;

@ -39,13 +39,13 @@ public class AssociationFloatSumAggregator implements Aggregator {
  protected final float[] sumArray;
  protected final FloatAssociationsPayloadIterator associations;

-  public AssociationFloatSumAggregator(IndexReader reader, float[] sumArray) throws IOException {
-    this(CategoryListParams.DEFAULT_TERM.field(), reader, sumArray);
+  public AssociationFloatSumAggregator(float[] sumArray) throws IOException {
+    this(CategoryListParams.DEFAULT_TERM.field(), sumArray);
  }
  
-  public AssociationFloatSumAggregator(String field, IndexReader reader, float[] sumArray) throws IOException {
+  public AssociationFloatSumAggregator(String field, float[] sumArray) throws IOException {
    this.field = field;
-    associations = new FloatAssociationsPayloadIterator(reader, field, new CategoryFloatAssociation());
+    associations = new FloatAssociationsPayloadIterator(field, new CategoryFloatAssociation());
    this.sumArray = sumArray;
  }

@ -76,4 +76,9 @@ public class AssociationFloatSumAggregator implements Aggregator {
    return field.hashCode();
  }

+  @Override
+  public boolean setNextReader(AtomicReaderContext context) throws IOException {
+    return associations.setNextReader(context);
+  }
+  
 }
--- a/lucene/facet/src/java/org/apache/lucene/facet/search/aggregator/associations/AssociationIntSumAggregator.java
+++ b/lucene/facet/src/java/org/apache/lucene/facet/search/aggregator/associations/AssociationIntSumAggregator.java
@ -6,7 +6,7 @@ import org.apache.lucene.facet.associations.CategoryIntAssociation;
 import org.apache.lucene.facet.associations.IntAssociationsPayloadIterator;
 import org.apache.lucene.facet.index.params.CategoryListParams;
 import org.apache.lucene.facet.search.aggregator.Aggregator;
-import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.AtomicReaderContext;
 import org.apache.lucene.util.IntsRef;
 import org.apache.lucene.util.collections.IntToIntMap;

@ -39,13 +39,13 @@ public class AssociationIntSumAggregator implements Aggregator {
  protected final int[] sumArray;
  protected final IntAssociationsPayloadIterator associations;

-  public AssociationIntSumAggregator(IndexReader reader, int[] sumArray) throws IOException {
-    this(CategoryListParams.DEFAULT_TERM.field(), reader, sumArray);
+  public AssociationIntSumAggregator(int[] sumArray) throws IOException {
+    this(CategoryListParams.DEFAULT_TERM.field(), sumArray);
  }
  
-  public AssociationIntSumAggregator(String field, IndexReader reader, int[] sumArray) throws IOException {
+  public AssociationIntSumAggregator(String field, int[] sumArray) throws IOException {
    this.field = field;
-    associations = new IntAssociationsPayloadIterator(reader, field, new CategoryIntAssociation());
+    associations = new IntAssociationsPayloadIterator(field, new CategoryIntAssociation());
    this.sumArray = sumArray;
  }

@ -76,4 +76,9 @@ public class AssociationIntSumAggregator implements Aggregator {
    return field.hashCode();
  }

+  @Override
+  public boolean setNextReader(AtomicReaderContext context) throws IOException {
+    return associations.setNextReader(context);
+  }
+
 }
--- a/lucene/facet/src/java/org/apache/lucene/facet/search/cache/CategoryListData.java
+++ b/lucene/facet/src/java/org/apache/lucene/facet/search/cache/CategoryListData.java
@ -6,6 +6,7 @@ import org.apache.lucene.facet.index.params.CategoryListParams;
 import org.apache.lucene.facet.index.params.FacetIndexingParams;
 import org.apache.lucene.facet.search.CategoryListIterator;
 import org.apache.lucene.facet.taxonomy.TaxonomyReader;
+import org.apache.lucene.index.AtomicReaderContext;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.util.IntsRef;

@ -56,25 +57,30 @@ public class CategoryListData {
  }
  
  /** Compute category list data for caching for faster iteration. */
-  CategoryListData(IndexReader reader, TaxonomyReader taxo, 
-      FacetIndexingParams iparams, CategoryListParams clp) throws IOException {
+  CategoryListData(IndexReader reader, TaxonomyReader taxo, FacetIndexingParams iparams, CategoryListParams clp) 
+      throws IOException {
  
-    final int maxDoc = reader.maxDoc();
-    int[][][]dpf  = new int[maxDoc][][];
+    int[][][]dpf  = new int[reader.maxDoc()][][];
    int numPartitions = (int)Math.ceil(taxo.getSize()/(double)iparams.getPartitionSize());
    IntsRef ordinals = new IntsRef(32);
    for (int part = 0; part < numPartitions; part++) {
-      CategoryListIterator cli = clp.createCategoryListIterator(reader, part);
-      if (cli.init()) {
-        for (int doc = 0; doc < maxDoc; doc++) {
-          cli.getOrdinals(doc, ordinals);
+      for (AtomicReaderContext context : reader.leaves()) {
+        CategoryListIterator cli = clp.createCategoryListIterator(part);
+        if (cli.setNextReader(context)) {
+          final int maxDoc = context.reader().maxDoc();
+          for (int i = 0; i < maxDoc; i++) {
+            cli.getOrdinals(i, ordinals);
            if (ordinals.length > 0) {
+              int doc = i + context.docBase;
              if (dpf[doc] == null) {
                dpf[doc] = new int[numPartitions][];
              }
+              if (dpf[doc][part] == null) {
                dpf[doc][part] = new int[ordinals.length];
-            for (int i = 0; i < ordinals.length; i++) {
-              dpf[doc][part][i] = ordinals.ints[i];
+              }
+              for (int j = 0; j < ordinals.length; j++) {
+                dpf[doc][part][j] = ordinals.ints[j];
+              }
            }
          }
        }
@ -93,6 +99,7 @@ public class CategoryListData {
  /** Internal: category list iterator over uncompressed category info in RAM */
  private static class RAMCategoryListIterator implements CategoryListIterator {
    
+    private int docBase;
    private final int part;
    private final int[][][] dpc;
    
@ -102,13 +109,15 @@ public class CategoryListData {
    }

    @Override
-    public boolean init() throws IOException {
+    public boolean setNextReader(AtomicReaderContext context) throws IOException {
+      docBase = context.docBase;
      return dpc != null && dpc.length > part;
    }
    
    @Override
    public void getOrdinals(int docID, IntsRef ints) throws IOException {
      ints.length = 0;
+      docID += docBase;
      if (dpc.length > docID && dpc[docID] != null && dpc[docID][part] != null) {
        if (ints.ints.length < dpc[docID][part].length) {
          ints.grow(dpc[docID][part].length);
--- a/lucene/facet/src/java/org/apache/lucene/facet/search/params/CountFacetRequest.java
+++ b/lucene/facet/src/java/org/apache/lucene/facet/search/params/CountFacetRequest.java
@ -1,7 +1,5 @@
 package org.apache.lucene.facet.search.params;

-import org.apache.lucene.index.IndexReader;
-
 import org.apache.lucene.facet.search.FacetArrays;
 import org.apache.lucene.facet.search.aggregator.Aggregator;
 import org.apache.lucene.facet.search.aggregator.ComplementCountingAggregator;
@ -47,8 +45,7 @@ public class CountFacetRequest extends FacetRequest {
  }

  @Override
-  public Aggregator createAggregator(boolean useComplements,
-      FacetArrays arrays, IndexReader reader, TaxonomyReader taxonomy) {
+  public Aggregator createAggregator(boolean useComplements, FacetArrays arrays, TaxonomyReader taxonomy) {
    // we rely on that, if needed, result is cleared by arrays!
    int[] a = arrays.getIntArray();
    if (useComplements) {
--- a/lucene/facet/src/java/org/apache/lucene/facet/search/params/FacetRequest.java
+++ b/lucene/facet/src/java/org/apache/lucene/facet/search/params/FacetRequest.java
@ -2,8 +2,6 @@ package org.apache.lucene.facet.search.params;

 import java.io.IOException;

-import org.apache.lucene.index.IndexReader;
-
 import org.apache.lucene.facet.index.params.CategoryListParams;
 import org.apache.lucene.facet.search.CategoryListIterator;
 import org.apache.lucene.facet.search.FacetArrays;
@ -11,8 +9,8 @@ import org.apache.lucene.facet.search.FacetResultsHandler;
 import org.apache.lucene.facet.search.TopKFacetResultsHandler;
 import org.apache.lucene.facet.search.TopKInEachNodeHandler;
 import org.apache.lucene.facet.search.aggregator.Aggregator;
-import org.apache.lucene.facet.search.cache.CategoryListData;
 import org.apache.lucene.facet.search.cache.CategoryListCache;
+import org.apache.lucene.facet.search.cache.CategoryListData;
 import org.apache.lucene.facet.taxonomy.CategoryPath;
 import org.apache.lucene.facet.taxonomy.TaxonomyReader;

@ -314,23 +312,19 @@ public abstract class FacetRequest implements Cloneable {
   *          computation.
   * @param arrays
   *          provider for facet arrays in use for current computation.
-   * @param indexReader
-   *          index reader in effect.
   * @param taxonomy
   *          reader of taxonomy in effect.
   * @throws IOException If there is a low-level I/O error.
   */
-  public abstract Aggregator createAggregator(boolean useComplements,
-      FacetArrays arrays, IndexReader indexReader,
-      TaxonomyReader taxonomy) throws IOException;
+  public abstract Aggregator createAggregator(boolean useComplements, FacetArrays arrays, TaxonomyReader taxonomy) 
+      throws IOException;

  /**
-   * Create the category list iterator for the specified partition.
-   * If a non null cache is provided which contains the required data, 
-   * use it for the iteration.
+   * Create the category list iterator for the specified partition. If a non
+   * null cache is provided which contains the required data, use it for the
+   * iteration.
   */
-  public CategoryListIterator createCategoryListIterator(IndexReader reader,
-      TaxonomyReader taxo, FacetSearchParams sParams, int partition)
+  public CategoryListIterator createCategoryListIterator(TaxonomyReader taxo, FacetSearchParams sParams, int partition)
      throws IOException {
    CategoryListCache clCache = sParams.getCategoryListCache();
    CategoryListParams clParams = sParams.getFacetIndexingParams().getCategoryListParams(categoryPath);
@ -340,7 +334,7 @@ public abstract class FacetRequest implements Cloneable {
        return clData.iterator(partition);
      }
    }
-    return clParams.createCategoryListIterator(reader, partition);
+    return clParams.createCategoryListIterator(partition);
  }

  /**
--- a/lucene/facet/src/java/org/apache/lucene/facet/search/params/ScoreFacetRequest.java
+++ b/lucene/facet/src/java/org/apache/lucene/facet/search/params/ScoreFacetRequest.java
@ -1,7 +1,5 @@
 package org.apache.lucene.facet.search.params;

-import org.apache.lucene.index.IndexReader;
-
 import org.apache.lucene.facet.search.FacetArrays;
 import org.apache.lucene.facet.search.aggregator.Aggregator;
 import org.apache.lucene.facet.search.aggregator.ScoringAggregator;
@ -38,9 +36,7 @@ public class ScoreFacetRequest extends FacetRequest {
  }

  @Override
-  public Aggregator createAggregator(boolean useComplements,
-                                      FacetArrays arrays, IndexReader reader,
-                                      TaxonomyReader taxonomy) {
+  public Aggregator createAggregator(boolean useComplements, FacetArrays arrays, TaxonomyReader taxonomy) {
    assert !useComplements : "complements are not supported by this FacetRequest";
    return new ScoringAggregator(arrays.getFloatArray());
  }
--- a/lucene/facet/src/java/org/apache/lucene/facet/search/params/associations/AssociationFloatSumFacetRequest.java
+++ b/lucene/facet/src/java/org/apache/lucene/facet/search/params/associations/AssociationFloatSumFacetRequest.java
@ -2,8 +2,6 @@ package org.apache.lucene.facet.search.params.associations;

 import java.io.IOException;

-import org.apache.lucene.index.IndexReader;
-
 import org.apache.lucene.facet.search.FacetArrays;
 import org.apache.lucene.facet.search.aggregator.Aggregator;
 import org.apache.lucene.facet.search.aggregator.associations.AssociationFloatSumAggregator;
@ -45,10 +43,10 @@ public class AssociationFloatSumFacetRequest extends FacetRequest {
  }

  @Override
-  public Aggregator createAggregator(boolean useComplements, FacetArrays arrays, IndexReader reader, 
-      TaxonomyReader taxonomy) throws IOException {
+  public Aggregator createAggregator(boolean useComplements, FacetArrays arrays, TaxonomyReader taxonomy) 
+      throws IOException {
    assert !useComplements : "complements are not supported by this FacetRequest";
-    return new AssociationFloatSumAggregator(reader, arrays.getFloatArray());
+    return new AssociationFloatSumAggregator(arrays.getFloatArray());
  }

  @Override
--- a/lucene/facet/src/java/org/apache/lucene/facet/search/params/associations/AssociationIntSumFacetRequest.java
+++ b/lucene/facet/src/java/org/apache/lucene/facet/search/params/associations/AssociationIntSumFacetRequest.java
@ -2,8 +2,6 @@ package org.apache.lucene.facet.search.params.associations;

 import java.io.IOException;

-import org.apache.lucene.index.IndexReader;
-
 import org.apache.lucene.facet.search.FacetArrays;
 import org.apache.lucene.facet.search.aggregator.Aggregator;
 import org.apache.lucene.facet.search.aggregator.associations.AssociationIntSumAggregator;
@ -45,10 +43,10 @@ public class AssociationIntSumFacetRequest extends FacetRequest {
  }

  @Override
-  public Aggregator createAggregator(boolean useComplements, FacetArrays arrays, IndexReader reader, 
-      TaxonomyReader taxonomy) throws IOException {
+  public Aggregator createAggregator(boolean useComplements, FacetArrays arrays, TaxonomyReader taxonomy) 
+      throws IOException {
    assert !useComplements : "complements are not supported by this FacetRequest";
-    return new AssociationIntSumAggregator(reader, arrays.getIntArray());
+    return new AssociationIntSumAggregator(arrays.getIntArray());
  }

  @Override
--- a/lucene/facet/src/java/org/apache/lucene/facet/search/sampling/Sampler.java
+++ b/lucene/facet/src/java/org/apache/lucene/facet/search/sampling/Sampler.java
@ -60,6 +60,7 @@ public abstract class Sampler {
  
  /**
   * Construct with certain {@link SamplingParams}
+   * 
   * @param params sampling params in effect
   * @throws IllegalArgumentException if the provided SamplingParams are not valid 
   */
@ -110,16 +111,15 @@ public abstract class Sampler {
   * @param sampleSetSize required size of sample set
   * @return sample of the input set in the required size
   */
-  protected abstract SampleResult createSample(ScoredDocIDs docids, int actualSize,
-      int sampleSetSize) throws IOException;
+  protected abstract SampleResult createSample(ScoredDocIDs docids, int actualSize, int sampleSetSize) 
+      throws IOException;

  /**
   * Get a fixer of sample facet accumulation results. Default implementation
   * returns a <code>TakmiSampleFixer</code> which is adequate only for
   * counting. For any other accumulator, provide a different fixer.
   */
-  public SampleFixer getSampleFixer(
-      IndexReader indexReader, TaxonomyReader taxonomyReader,
+  public SampleFixer getSampleFixer(IndexReader indexReader, TaxonomyReader taxonomyReader,
      FacetSearchParams searchParams) {
    return new TakmiSampleFixer(indexReader, taxonomyReader, searchParams);
  }
@ -215,19 +215,15 @@ public abstract class Sampler {
    }
    
    @Override
-    public CategoryListIterator createCategoryListIterator(IndexReader reader,
-        TaxonomyReader taxo, FacetSearchParams sParams, int partition)
-        throws IOException {
-      return orig.createCategoryListIterator(reader, taxo, sParams, partition);
+    public CategoryListIterator createCategoryListIterator(TaxonomyReader taxo, FacetSearchParams sParams, 
+        int partition) throws IOException {
+      return orig.createCategoryListIterator(taxo, sParams, partition);
    }
    
-    
    @Override
-    public Aggregator createAggregator(boolean useComplements,
-        FacetArrays arrays, IndexReader indexReader,
-        TaxonomyReader taxonomy) throws IOException {
-      return orig.createAggregator(useComplements, arrays, indexReader,
-          taxonomy);
+    public Aggregator createAggregator(boolean useComplements, FacetArrays arrays, TaxonomyReader taxonomy) 
+        throws IOException {
+      return orig.createAggregator(useComplements, arrays, taxonomy);
    }

    @Override
@ -245,4 +241,5 @@ public abstract class Sampler {
      return orig.supportsComplements();
    }
  }
+
 }
--- a/lucene/facet/src/java/org/apache/lucene/facet/search/sampling/TakmiSampleFixer.java
+++ b/lucene/facet/src/java/org/apache/lucene/facet/search/sampling/TakmiSampleFixer.java
@ -91,8 +91,7 @@ class TakmiSampleFixer implements SampleFixer {
   *          full set of matching documents.
   * @throws IOException If there is a low-level I/O error.
   */
-  private void recount(FacetResultNode fresNode, ScoredDocIDs docIds)
-      throws IOException {
+  private void recount(FacetResultNode fresNode, ScoredDocIDs docIds) throws IOException {
    // TODO (Facet): change from void to return the new, smaller docSet, and use
    // that for the children, as this will make their intersection ops faster.
    // can do this only when the new set is "sufficiently" smaller.
@ -109,8 +108,7 @@ class TakmiSampleFixer implements SampleFixer {
    Bits liveDocs = MultiFields.getLiveDocs(indexReader);
    int updatedCount = countIntersection(MultiFields.getTermDocsEnum(indexReader, liveDocs,
                                                                     drillDownTerm.field(), drillDownTerm.bytes(),
-                                                                     0),
-                                         docIds.iterator());
+                                                                     0), docIds.iterator());

    fresNode.setValue(updatedCount);
  }
--- a/lucene/facet/src/java/org/apache/lucene/facet/util/MultiCategoryListIterator.java
+++ b/lucene/facet/src/java/org/apache/lucene/facet/util/MultiCategoryListIterator.java
@ -5,6 +5,7 @@ import java.util.ArrayList;
 import java.util.List;

 import org.apache.lucene.facet.search.CategoryListIterator;
+import org.apache.lucene.index.AtomicReaderContext;
 import org.apache.lucene.util.IntsRef;

 /*
@ -42,9 +43,10 @@ public class MultiCategoryListIterator implements CategoryListIterator {
  }

  @Override
-  public boolean init() throws IOException {
+  public boolean setNextReader(AtomicReaderContext context) throws IOException {
+    validIterators.clear();
    for (CategoryListIterator cli : iterators) {
-      if (cli.init()) {
+      if (cli.setNextReader(context)) {
        validIterators.add(cli);
      }
    }
--- a/lucene/facet/src/java/org/apache/lucene/facet/util/ScoredDocIdsUtils.java
+++ b/lucene/facet/src/java/org/apache/lucene/facet/util/ScoredDocIdsUtils.java
@ -3,17 +3,18 @@ package org.apache.lucene.facet.util;
 import java.io.IOException;
 import java.util.Arrays;

+import org.apache.lucene.facet.search.ScoredDocIDs;
+import org.apache.lucene.facet.search.ScoredDocIDsIterator;
+import org.apache.lucene.index.AtomicReader;
+import org.apache.lucene.index.AtomicReaderContext;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.MultiFields;
 import org.apache.lucene.search.DocIdSet;
 import org.apache.lucene.search.DocIdSetIterator;
 import org.apache.lucene.util.Bits;
-import org.apache.lucene.util.OpenBitSet;
+import org.apache.lucene.util.FixedBitSet;
 import org.apache.lucene.util.OpenBitSetDISI;

-import org.apache.lucene.facet.search.ScoredDocIDs;
-import org.apache.lucene.facet.search.ScoredDocIDsIterator;
-
 /*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
@ -53,41 +54,50 @@ public class ScoredDocIdsUtils {
    final int maxDoc = reader.maxDoc();

    DocIdSet docIdSet = docids.getDocIDs();
-    final OpenBitSet complement;
-    if (docIdSet instanceof OpenBitSet) {
+    final FixedBitSet complement;
+    if (docIdSet instanceof FixedBitSet) {
      // That is the most common case, if ScoredDocIdsCollector was used.
-      complement = ((OpenBitSet) docIdSet).clone();
+      complement = ((FixedBitSet) docIdSet).clone();
    } else {
-      complement = new OpenBitSetDISI(docIdSet.iterator(), maxDoc);
+      complement = new FixedBitSet(maxDoc);
+      DocIdSetIterator iter = docIdSet.iterator();
+      int doc;
+      while ((doc = iter.nextDoc()) < maxDoc) {
+        complement.set(doc);
+      }
    }
-
    complement.flip(0, maxDoc);
-
-    // Remove all Deletions from the complement set
    clearDeleted(reader, complement);

    return createScoredDocIds(complement, maxDoc);
  }
  
-  /**
-   * Clear all deleted documents from a given open-bit-set according to a given reader 
-   */
-  private static void clearDeleted(final IndexReader reader, 
-      final OpenBitSet set) throws IOException {
+  /** Clear all deleted documents from a given open-bit-set according to a given reader */
+  private static void clearDeleted(final IndexReader reader, final FixedBitSet set) throws IOException {
    
    // If there are no deleted docs
    if (!reader.hasDeletions()) {
      return; // return immediately
    }
    
-    Bits bits = MultiFields.getLiveDocs(reader);
-
    DocIdSetIterator it = set.iterator();
-    int doc = DocIdSetIterator.NO_MORE_DOCS;
-    while ((doc = it.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
-      if (!bits.get(doc)) {
-        set.fastClear(doc);
+    int doc = it.nextDoc(); 
+    for (AtomicReaderContext context : reader.leaves()) {
+      AtomicReader r = context.reader();
+      final int maxDoc = r.maxDoc() + context.docBase;
+      if (doc >= maxDoc) { // skip this segment
+        continue;
      }
+      if (!r.hasDeletions()) { // skip all docs that belong to this reader as it has no deletions
+        while ((doc = it.nextDoc()) < maxDoc) {}
+        continue;
+      }
+      Bits liveDocs = r.getLiveDocs();
+      do {
+        if (!liveDocs.get(doc - context.docBase)) {
+          set.clear(doc);
+        }
+      } while ((doc = it.nextDoc()) < maxDoc);
    }
  }
  
@ -274,8 +284,7 @@ public class ScoredDocIdsUtils {
              if (target <= next) {
                target = next + 1;
              }
-              return next = target >= maxDoc ? NO_MORE_DOCS
-                  : target;
+              return next = target >= maxDoc ? NO_MORE_DOCS : target;
            }

            @Override
@ -420,4 +429,5 @@ public class ScoredDocIdsUtils {
      }
    }
  }
+  
 }
--- a/lucene/facet/src/test/org/apache/lucene/facet/FacetTestBase.java
+++ b/lucene/facet/src/test/org/apache/lucene/facet/FacetTestBase.java
@ -317,8 +317,7 @@ public abstract class FacetTestBase extends LuceneTestCase {
  }
  
  /** Validate results equality */
-  protected static void assertSameResults(List<FacetResult> expected,
-                                          List<FacetResult> actual) {
+  protected static void assertSameResults(List<FacetResult> expected, List<FacetResult> actual) {
    String expectedResults = resStringValueOnly(expected);
    String actualResults = resStringValueOnly(actual);
    if (!expectedResults.equals(actualResults)) {
--- a/lucene/facet/src/test/org/apache/lucene/facet/search/AdaptiveAccumulatorTest.java
+++ b/lucene/facet/src/test/org/apache/lucene/facet/search/AdaptiveAccumulatorTest.java
@ -29,12 +29,11 @@ import org.apache.lucene.facet.taxonomy.TaxonomyReader;
 public class AdaptiveAccumulatorTest extends BaseSampleTestTopK {

  @Override
-  protected FacetsAccumulator getSamplingAccumulator(Sampler sampler,
-      TaxonomyReader taxoReader, IndexReader indexReader,
-      FacetSearchParams searchParams) {
-    AdaptiveFacetsAccumulator res = new AdaptiveFacetsAccumulator(searchParams,
-        indexReader, taxoReader);
+  protected FacetsAccumulator getSamplingAccumulator(Sampler sampler, TaxonomyReader taxoReader, 
+      IndexReader indexReader, FacetSearchParams searchParams) {
+    AdaptiveFacetsAccumulator res = new AdaptiveFacetsAccumulator(searchParams, indexReader, taxoReader);
    res.setSampler(sampler);
    return res;
  }
+  
 }
--- a/lucene/facet/src/test/org/apache/lucene/facet/search/CategoryListIteratorTest.java
+++ b/lucene/facet/src/test/org/apache/lucene/facet/search/CategoryListIteratorTest.java
@ -14,6 +14,7 @@ import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
 import org.apache.lucene.document.TextField;
+import org.apache.lucene.index.AtomicReaderContext;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.RandomIndexWriter;
 import org.apache.lucene.index.Term;
@ -106,30 +107,31 @@ public class CategoryListIteratorTest extends LuceneTestCase {
    IndexReader reader = writer.getReader();
    writer.close();

-    IntsRef ordinals = new IntsRef();
-    CategoryListIterator cli = new PayloadCategoryListIteraor(reader, new Term("f","1"), encoder.createMatchingDecoder());
-    cli.init();
    int totalCategories = 0;
-    for (int i = 0; i < data.length; i++) {
+    IntsRef ordinals = new IntsRef();
+    CategoryListIterator cli = new PayloadCategoryListIteraor(new Term("f","1"), encoder.createMatchingDecoder());
+    for (AtomicReaderContext context : reader.leaves()) {
+      cli.setNextReader(context);
+      int maxDoc = context.reader().maxDoc();
+      int dataIdx = context.docBase;
+      for (int doc = 0; doc < maxDoc; doc++, dataIdx++) {
        Set<Integer> values = new HashSet<Integer>();
-      for (int j = 0; j < data[i].length; j++) {
-        values.add(data[i].ints[j]);
+        for (int j = 0; j < data[dataIdx].length; j++) {
+          values.add(data[dataIdx].ints[j]);
        }
-      cli.getOrdinals(i, ordinals);
-      assertTrue("no ordinals for document " + i, ordinals.length > 0);
+        cli.getOrdinals(doc, ordinals);
+        assertTrue("no ordinals for document " + doc, ordinals.length > 0);
        for (int j = 0; j < ordinals.length; j++) {
          assertTrue("expected category not found: " + ordinals.ints[j], values.contains(ordinals.ints[j]));
        }
        totalCategories += ordinals.length;
      }
+    }
    assertEquals("Missing categories!", 10, totalCategories);
    reader.close();
    dir.close();
  }

-  /**
-   * Test that a document with no payloads does not confuse the payload decoder.
-   */
  @Test
  public void testPayloadIteratorWithInvalidDoc() throws Exception {
    Directory dir = newDirectory();
@ -160,17 +162,20 @@ public class CategoryListIteratorTest extends LuceneTestCase {
    IndexReader reader = writer.getReader();
    writer.close();

-    IntsRef ordinals = new IntsRef();
-    CategoryListIterator cli = new PayloadCategoryListIteraor(reader, new Term("f","1"), encoder.createMatchingDecoder());
-    assertTrue("Failed to initialize payload iterator", cli.init());
    int totalCategories = 0;
-    for (int i = 0; i < data.length; i++) {
+    IntsRef ordinals = new IntsRef();
+    CategoryListIterator cli = new PayloadCategoryListIteraor(new Term("f","1"), encoder.createMatchingDecoder());
+    for (AtomicReaderContext context : reader.leaves()) {
+      cli.setNextReader(context);
+      int maxDoc = context.reader().maxDoc();
+      int dataIdx = context.docBase;
+      for (int doc = 0; doc < maxDoc; doc++, dataIdx++) {
        Set<Integer> values = new HashSet<Integer>();
-      for (int j = 0; j < data[i].length; j++) {
-        values.add(data[i].ints[j]);
+        for (int j = 0; j < data[dataIdx].length; j++) {
+          values.add(data[dataIdx].ints[j]);
        }
-      cli.getOrdinals(i, ordinals);
-      if (i == 0) {
+        cli.getOrdinals(doc, ordinals);
+        if (dataIdx == 0) {
          assertTrue("document 0 must have a payload", ordinals.length > 0);
          for (int j = 0; j < ordinals.length; j++) {
            assertTrue("expected category not found: " + ordinals.ints[j], values.contains(ordinals.ints[j]));
@ -180,6 +185,7 @@ public class CategoryListIteratorTest extends LuceneTestCase {
          assertTrue("only document 0 should have a payload", ordinals.length == 0);
        }
      }
+    }
    assertEquals("Wrong number of total categories!", 2, totalCategories);

    reader.close();
--- a/lucene/facet/src/test/org/apache/lucene/facet/search/TestCategoryListCache.java
+++ b/lucene/facet/src/test/org/apache/lucene/facet/search/TestCategoryListCache.java
@ -22,6 +22,7 @@ import org.apache.lucene.facet.search.params.FacetRequest;
 import org.apache.lucene.facet.search.params.FacetSearchParams;
 import org.apache.lucene.facet.search.results.FacetResult;
 import org.apache.lucene.facet.taxonomy.CategoryPath;
+import org.apache.lucene.index.AtomicReaderContext;

 /*
 * Licensed to the Apache Software Foundation (ASF) under one or more
@ -132,8 +133,8 @@ public class TestCategoryListCache extends FacetTestBase {
            }
          }
          @Override
-          public boolean init() throws IOException {
-            return it.init();
+          public boolean setNextReader(AtomicReaderContext context) throws IOException {
+            return it.setNextReader(context);
          }
        };
      }
--- a/lucene/facet/src/test/org/apache/lucene/facet/search/TestStandardFacetsAccumulator.java
+++ b/lucene/facet/src/test/org/apache/lucene/facet/search/TestStandardFacetsAccumulator.java
@ -0,0 +1,128 @@
+package org.apache.lucene.facet.search;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+
+import org.apache.lucene.analysis.MockAnalyzer;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field.Store;
+import org.apache.lucene.document.StringField;
+import org.apache.lucene.facet.index.FacetFields;
+import org.apache.lucene.facet.index.params.CategoryListParams;
+import org.apache.lucene.facet.index.params.FacetIndexingParams;
+import org.apache.lucene.facet.search.params.CountFacetRequest;
+import org.apache.lucene.facet.search.params.FacetRequest;
+import org.apache.lucene.facet.search.params.FacetSearchParams;
+import org.apache.lucene.facet.search.results.FacetResult;
+import org.apache.lucene.facet.search.results.FacetResultNode;
+import org.apache.lucene.facet.taxonomy.CategoryPath;
+import org.apache.lucene.facet.taxonomy.TaxonomyReader;
+import org.apache.lucene.facet.taxonomy.TaxonomyWriter;
+import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader;
+import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter;
+import org.apache.lucene.facet.util.AssertingCategoryListIterator;
+import org.apache.lucene.index.DirectoryReader;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.IndexWriterConfig;
+import org.apache.lucene.index.NoMergePolicy;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.TermQuery;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.util.IOUtils;
+import org.apache.lucene.util.LuceneTestCase;
+import org.junit.Test;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class TestStandardFacetsAccumulator extends LuceneTestCase {
+  
+  private void indexTwoDocs(IndexWriter indexWriter, FacetFields facetFields, boolean withContent) throws Exception {
+    for (int i = 0; i < 2; i++) {
+      Document doc = new Document();
+      if (withContent) {
+        doc.add(new StringField("f", "a", Store.NO));
+      }
+      if (facetFields != null) {
+        facetFields.addFields(doc, Collections.singletonList(new CategoryPath("A", Integer.toString(i))));
+      }
+      indexWriter.addDocument(doc);
+    }
+    
+    indexWriter.commit();
+  }
+  
+  @Test
+  public void testSegmentsWithoutCategoriesOrResults() throws Exception {
+    // tests the accumulator when there are segments with no results
+    Directory indexDir = newDirectory();
+    Directory taxoDir = newDirectory();
+    
+    IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
+    iwc.setMergePolicy(NoMergePolicy.COMPOUND_FILES); // prevent merges
+    IndexWriter indexWriter = new IndexWriter(indexDir, iwc);
+    FacetIndexingParams fip = new FacetIndexingParams(new CategoryListParams() {
+      @Override
+      public CategoryListIterator createCategoryListIterator(int partition) throws IOException {
+        return new AssertingCategoryListIterator(super.createCategoryListIterator(partition));
+      }
+    });
+    TaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir);
+    FacetFields facetFields = new FacetFields(taxoWriter, fip);
+    indexTwoDocs(indexWriter, facetFields, false); // 1st segment, no content, with categories
+    indexTwoDocs(indexWriter, null, true);         // 2nd segment, with content, no categories
+    indexTwoDocs(indexWriter, facetFields, true);  // 3rd segment ok
+    indexTwoDocs(indexWriter, null, false);        // 4th segment, no content, or categories
+    indexTwoDocs(indexWriter, null, true);         // 5th segment, with content, no categories
+    indexTwoDocs(indexWriter, facetFields, true);  // 6th segment, with content, with categories
+    IOUtils.close(indexWriter, taxoWriter);
+
+    DirectoryReader indexReader = DirectoryReader.open(indexDir);
+    TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoDir);
+    IndexSearcher indexSearcher = new IndexSearcher(indexReader);
+    
+    // search for "f:a", only segments 1 and 3 should match results
+    Query q = new TermQuery(new Term("f", "a"));
+    ArrayList<FacetRequest> requests = new ArrayList<FacetRequest>(1);
+    CountFacetRequest countNoComplements = new CountFacetRequest(new CategoryPath("A"), 10) {
+      @Override
+      public boolean supportsComplements() {
+        return false; // disable complements
+      }
+    };
+    requests.add(countNoComplements);
+    FacetSearchParams fsp = new FacetSearchParams(requests, fip);
+    FacetsCollector fc = new FacetsCollector(fsp , indexReader, taxoReader);
+    indexSearcher.search(q, fc);
+    List<FacetResult> results = fc.getFacetResults();
+    assertEquals("received too many facet results", 1, results.size());
+    FacetResultNode frn = results.get(0).getFacetResultNode();
+    assertEquals("wrong weight for \"A\"", 4, (int) frn.getValue());
+    assertEquals("wrong number of children", 2, frn.getNumSubResults());
+    for (FacetResultNode node : frn.getSubResults()) {
+      assertEquals("wrong weight for child " + node.getLabel(), 2, (int) node.getValue());
+    }
+    IOUtils.close(indexReader, taxoReader);
+    
+    IOUtils.close(indexDir, taxoDir);
+  }
+  
+}
--- a/lucene/facet/src/test/org/apache/lucene/facet/search/params/MultiCategoryListIteratorTest.java
+++ b/lucene/facet/src/test/org/apache/lucene/facet/search/params/MultiCategoryListIteratorTest.java
@ -17,6 +17,7 @@ import org.apache.lucene.facet.taxonomy.TaxonomyWriter;
 import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader;
 import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter;
 import org.apache.lucene.facet.util.MultiCategoryListIterator;
+import org.apache.lucene.index.AtomicReaderContext;
 import org.apache.lucene.index.DirectoryReader;
 import org.apache.lucene.index.IndexWriter;
 import org.apache.lucene.index.Term;
@ -100,13 +101,14 @@ public class MultiCategoryListIteratorTest extends LuceneTestCase {
        clCache.loadAndRegister(clp, indexReader, taxoReader, indexingParams);
        iterators[i] = clCache.get(clp).iterator(0); // no partitions
      } else {
-        iterators[i] = new PayloadCategoryListIteraor(indexReader, clp.getTerm(), decoder);
+        iterators[i] = new PayloadCategoryListIteraor(clp.getTerm(), decoder);
      }
    }
    MultiCategoryListIterator cli = new MultiCategoryListIterator(iterators);
-    assertTrue("failed to init multi-iterator", cli.init());
+    for (AtomicReaderContext context : indexReader.leaves()) {
+      assertTrue("failed to init multi-iterator", cli.setNextReader(context));
      IntsRef ordinals = new IntsRef();
-    int maxDoc = indexReader.maxDoc();
+      final int maxDoc = context.reader().maxDoc();
      for (int i = 0; i < maxDoc; i++) {
        cli.getOrdinals(i, ordinals);
        assertTrue("document " + i + " does not have categories", ordinals.length > 0);
@ -114,7 +116,9 @@ public class MultiCategoryListIteratorTest extends LuceneTestCase {
          CategoryPath cp = taxoReader.getPath(ordinals.ints[j]);
          assertNotNull("ordinal " + ordinals.ints[j] + " not found in taxonomy", cp);
          if (cp.length == 2) {
-          assertEquals("invalid category for document " + i, i, Integer.parseInt(cp.components[1]));
+            int globalDoc = i + context.docBase;
+            assertEquals("invalid category for document " + globalDoc, globalDoc, Integer.parseInt(cp.components[1]));
+          }
        }
      }
    }
--- a/lucene/facet/src/test/org/apache/lucene/facet/search/sampling/BaseSampleTestTopK.java
+++ b/lucene/facet/src/test/org/apache/lucene/facet/search/sampling/BaseSampleTestTopK.java
@ -59,9 +59,8 @@ public abstract class BaseSampleTestTopK extends BaseTestTopK {
    return res;
  }
  
-  protected abstract FacetsAccumulator getSamplingAccumulator(Sampler sampler,
-      TaxonomyReader taxoReader, IndexReader indexReader,
-      FacetSearchParams searchParams);
+  protected abstract FacetsAccumulator getSamplingAccumulator(Sampler sampler, TaxonomyReader taxoReader, 
+      IndexReader indexReader, FacetSearchParams searchParams);
  
  /**
   * Try out faceted search with sampling enabled and complements either disabled or enforced
@ -119,14 +118,11 @@ public abstract class BaseSampleTestTopK extends BaseTestTopK {
    assertSameResults(expected, sampledResults);
  }
  
-  private FacetsCollector samplingCollector(
-      final boolean complement,
-      final Sampler sampler,
+  private FacetsCollector samplingCollector(final boolean complement, final Sampler sampler,
      FacetSearchParams samplingSearchParams) {
    FacetsCollector samplingFC = new FacetsCollector(samplingSearchParams, indexReader, taxoReader) {
      @Override
-      protected FacetsAccumulator initFacetsAccumulator(
-          FacetSearchParams facetSearchParams, IndexReader indexReader,
+      protected FacetsAccumulator initFacetsAccumulator(FacetSearchParams facetSearchParams, IndexReader indexReader,
          TaxonomyReader taxonomyReader) {
        FacetsAccumulator acc = getSamplingAccumulator(sampler, taxonomyReader, indexReader, facetSearchParams);
        acc.setComplementThreshold(complement ? FacetsAccumulator.FORCE_COMPLEMENT : FacetsAccumulator.DISABLE_COMPLEMENT);
@ -144,12 +140,13 @@ public abstract class BaseSampleTestTopK extends BaseTestTopK {
    samplingParams.setMinSampleSize((int) (100 * retryFactor));
    samplingParams.setMaxSampleSize((int) (10000 * retryFactor));
    samplingParams.setOversampleFactor(5.0 * retryFactor);
-
    samplingParams.setSamplingThreshold(11000); //force sampling
+
    Sampler sampler = useRandomSampler ? 
        new RandomSampler(samplingParams, new Random(random().nextLong())) :
          new RepeatableSampler(samplingParams);
    assertTrue("must enable sampling for this test!",sampler.shouldSample(scoredDocIDs));
    return sampler;
  }
+  
 }
--- a/lucene/facet/src/test/org/apache/lucene/facet/util/AssertingCategoryListIterator.java
+++ b/lucene/facet/src/test/org/apache/lucene/facet/util/AssertingCategoryListIterator.java
@ -0,0 +1,65 @@
+package org.apache.lucene.facet.util;
+
+import java.io.IOException;
+
+import org.apache.lucene.facet.search.CategoryListIterator;
+import org.apache.lucene.index.AtomicReaderContext;
+import org.apache.lucene.util.IntsRef;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * A {@link CategoryListIterator} which asserts that
+ * {@link #getOrdinals(int, IntsRef)} is not called before
+ * {@link #setNextReader(AtomicReaderContext)} and that if
+ * {@link #setNextReader(AtomicReaderContext)} returns false,
+ * {@link #getOrdinals(int, IntsRef)} isn't called.
+ */
+public class AssertingCategoryListIterator implements CategoryListIterator {
+ 
+  private final CategoryListIterator delegate;
+  private boolean setNextReaderCalled = false;
+  private boolean validSegment = false;
+  private int maxDoc;
+  
+  public AssertingCategoryListIterator(CategoryListIterator delegate) {
+    this.delegate = delegate;
+  }
+  
+  @Override
+  public boolean setNextReader(AtomicReaderContext context) throws IOException {
+    setNextReaderCalled = true;
+    maxDoc = context.reader().maxDoc();
+    return validSegment = delegate.setNextReader(context);
+  }
+  
+  @Override
+  public void getOrdinals(int docID, IntsRef ints) throws IOException {
+    if (!setNextReaderCalled) {
+      throw new RuntimeException("should not call getOrdinals without setNextReader first");
+    }
+    if (!validSegment) {
+      throw new RuntimeException("should not call getOrdinals if setNextReader returned false");
+    }
+    if (docID >= maxDoc) {
+      throw new RuntimeException("docID is larger than current maxDoc; forgot to call setNextReader?");
+    }
+    delegate.getOrdinals(docID, ints);
+  }
+  
+}
--- a/lucene/facet/src/test/org/apache/lucene/facet/util/TestScoredDocIDsUtils.java
+++ b/lucene/facet/src/test/org/apache/lucene/facet/util/TestScoredDocIDsUtils.java
@ -9,6 +9,9 @@ import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
 import org.apache.lucene.document.FieldType;
 import org.apache.lucene.document.StringField;
+import org.apache.lucene.facet.search.ScoredDocIDs;
+import org.apache.lucene.facet.search.ScoredDocIDsIterator;
+import org.apache.lucene.facet.search.ScoredDocIdCollector;
 import org.apache.lucene.index.DirectoryReader;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.MultiFields;
@ -21,14 +24,9 @@ import org.apache.lucene.search.Query;
 import org.apache.lucene.search.TermQuery;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.util.Bits;
-import org.apache.lucene.util.OpenBitSet;
-import org.apache.lucene.util.OpenBitSetDISI;
-import org.junit.Test;
-
+import org.apache.lucene.util.FixedBitSet;
 import org.apache.lucene.util.LuceneTestCase;
-import org.apache.lucene.facet.search.ScoredDocIDs;
-import org.apache.lucene.facet.search.ScoredDocIDsIterator;
-import org.apache.lucene.facet.search.ScoredDocIdCollector;
+import org.junit.Test;

 /*
 * Licensed to the Apache Software Foundation (ASF) under one or more
@ -52,21 +50,21 @@ public class TestScoredDocIDsUtils extends LuceneTestCase {
  @Test
  public void testComplementIterator() throws Exception {
    final int n = atLeast(10000);
-    final OpenBitSet bits = new OpenBitSet(n);
-    for (int i = 0; i < 5 * n; i++) {
-      bits.flip(random().nextInt(n));
+    final FixedBitSet bits = new FixedBitSet(n);
+    Random random = random();
+    for (int i = 0; i < n; i++) {
+      int idx = random.nextInt(n);
+      bits.flip(idx, idx + 1);
    }
    
-    OpenBitSet verify = new OpenBitSet(n);
-    verify.or(bits);
+    FixedBitSet verify = new FixedBitSet(bits);

    ScoredDocIDs scoredDocIDs = ScoredDocIdsUtils.createScoredDocIds(bits, n); 

    Directory dir = newDirectory();
-    IndexReader reader = createReaderWithNDocs(random(), n, dir);
+    IndexReader reader = createReaderWithNDocs(random, n, dir);
    try { 
-      assertEquals(n - verify.cardinality(), ScoredDocIdsUtils.getComplementSet(scoredDocIDs, 
-        reader).size());
+      assertEquals(n - verify.cardinality(), ScoredDocIdsUtils.getComplementSet(scoredDocIDs, reader).size());
    } finally {
      reader.close();
      dir.close();
@ -147,7 +145,7 @@ public class TestScoredDocIDsUtils extends LuceneTestCase {
      searcher.search(q, collector);

      ScoredDocIDs scoredDocIds = collector.getScoredDocIDs();
-      OpenBitSet resultSet = new OpenBitSetDISI(scoredDocIds.getDocIDs().iterator(), reader.maxDoc());
+      FixedBitSet resultSet = (FixedBitSet) scoredDocIds.getDocIDs();
      
      // Getting the complement set of the query result
      ScoredDocIDs complementSet = ScoredDocIdsUtils.getComplementSet(scoredDocIds, reader);
@ -164,12 +162,11 @@ public class TestScoredDocIDsUtils extends LuceneTestCase {
        assertFalse(
            "Complement-Set must not contain deleted documents (doc="+docNum+")",
            live != null && !live.get(docNum));
-        assertNull(
-            "Complement-Set must not contain docs from the original set (doc="+ docNum+")",
+        assertNull("Complement-Set must not contain docs from the original set (doc="+ docNum+")", 
            reader.document(docNum).getField("del"));
        assertFalse(
            "Complement-Set must not contain docs from the original set (doc="+docNum+")",
-            resultSet.fastGet(docNum));
+            resultSet.get(docNum));
      }
    } finally {
      reader.close();
--- a/lucene/suggest/src/java/org/apache/lucene/search/suggest/fst/FSTCompletionBuilder.java
+++ b/lucene/suggest/src/java/org/apache/lucene/search/suggest/fst/FSTCompletionBuilder.java
@ -24,6 +24,7 @@ import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.BytesRefIterator;
 import org.apache.lucene.util.IntsRef;
 import org.apache.lucene.util.fst.*;
+import org.apache.lucene.util.packed.PackedInts;

 /**
 * Finite state automata based implementation of "autocomplete" functionality.
@ -237,7 +238,8 @@ public class FSTCompletionBuilder {
    final Object empty = outputs.getNoOutput();
    final Builder<Object> builder = new Builder<Object>(
        FST.INPUT_TYPE.BYTE1, 0, 0, true, true, 
-        shareMaxTailLength, outputs, null, false, true);
+        shareMaxTailLength, outputs, null, false, 
+        PackedInts.DEFAULT, true, 15);
    
    BytesRef scratch = new BytesRef();
    BytesRef entry;
--- a/lucene/test-framework/src/java/org/apache/lucene/util/fst/FSTTester.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/util/fst/FSTTester.java
@ -40,6 +40,7 @@ import org.apache.lucene.util.IntsRef;
 import org.apache.lucene.util.LuceneTestCase;
 import org.apache.lucene.util.UnicodeUtil;
 import org.apache.lucene.util._TestUtil;
+import org.apache.lucene.util.packed.PackedInts;

 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
@ -288,7 +289,16 @@ public class FSTTester<T> {
                                              outputs,
                                              null,
                                              willRewrite,
-                                              true);
+                                              PackedInts.DEFAULT,
+                                              true,
+                                              15);
+    if (LuceneTestCase.VERBOSE) {
+      if (willRewrite) {
+        System.out.println("TEST: packed FST");
+      } else {
+        System.out.println("TEST: non-packed FST");
+      }
+    }

    for(InputOutput<T> pair : pairs) {
      if (pair.output instanceof List) {
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@ -41,8 +41,6 @@ Detailed Change List
 Other Changes
 ----------------------

-* SOLR-3735: Relocate the example mime-to-extension mapping, and
-  upgrade Velocity Engine to 1.7 (ehatcher)

 ==================  4.1.0 ==================

@ -50,14 +48,14 @@ Versions of Major Components
 ---------------------
 Apache Tika 1.2
 Carrot2 3.6.2
-Velocity 1.6.4 and Velocity Tools 2.0
+Velocity 1.7 and Velocity Tools 2.0
 Apache UIMA 2.3.1
 Apache ZooKeeper 3.4.5

 Upgrading from Solr 4.0.0
 ----------------------

-Custom java parsing plugins need to migrade from throwing the internal
+Custom java parsing plugins need to migrate from throwing the internal
 ParseException to throwing SyntaxError.

 BaseDistributedSearchTestCase now randomizes the servlet context it uses when 
@ -150,7 +148,7 @@ New Features
  CoreAdmin API the same way as the data directory. (Mark Miller)
  
 * SOLR-4028: When using ZK chroot, it would be nice if Solr would create the 
-  initial path when it doesn't exist. (Tomas Fernandez Lobbe via Mark Miller)
+  initial path when it doesn't exist. (Tomás Fernández Löbbe via Mark Miller)

 * SOLR-3948: Calculate/display deleted documents in admin interface.
  (Shawn Heisey via Mark Miller)
@ -209,6 +207,9 @@ New Features
 * SOLR-2201: DIH's "formatDate" function now supports a timezone as an optional
  fourth parameter (James Dyer, Mark Waddle)

+* SOLR-4302: New parameter 'indexInfo' (defaults to true) in CoreAdmin STATUS
+  command can be used to omit index specific information (Shahar Davidson via shalin)
+
 Optimizations
 ----------------------

@ -226,7 +227,7 @@ Optimizations
  dynamicField's (steffkes)
  
 * SOLR-3941: The "commitOnLeader" part of distributed recovery can use
-  openSearcher=false. (Tomas Fernandez Lobbe via Mark Miller)
+  openSearcher=false. (Tomás Fernández Löbbe via Mark Miller)
  
 * SOLR-4063: Allow CoreContainer to load multiple SolrCores in parallel rather
  than just serially. (Mark Miller)
@ -250,6 +251,10 @@ Optimizations

 * SOLR-3982: Admin UI: Various Dataimport Improvements (steffkes)

+* SOLR-4296: Admin UI: Improve Dataimport Auto-Refresh (steffkes)
+
+* SOLR-3458: Allow multiple Items to stay open on Plugins-Page (steffkes)
+
 Bug Fixes
 ----------------------

@ -362,7 +367,7 @@ Bug Fixes

 * SOLR-4081: QueryParsing.toString, used during debugQuery=true, did not
  correctly handle ExtendedQueries such as WrappedQuery
-  (used when cache=false), spatial queries, and frange queires.
+  (used when cache=false), spatial queries, and frange queries.
  (Eirik Lygre, yonik)

 * SOLR-3959: Ensure the internal comma separator of poly fields is escaped
@ -403,7 +408,7 @@ Bug Fixes

 * SOLR-4162: ZkCli usage examples are not correct because the zkhost parameter 
  is not present and it is mandatory for all commands. 
-  (Tomas Fernandez Lobbe via Mark Miller)
+  (Tomás Fernández Löbbe via Mark Miller)

 * SOLR-4071: Validate that name is pass to Collections API create, and behave the
  same way as on startup when collection.configName is not explicitly passed.
@ -495,7 +500,7 @@ Bug Fixes
 * SOLR-4279: Wrong exception message if _version_ field is multivalued (shalin)

 * SOLR-4170: The 'backup' ReplicationHandler command can sometimes use a stale
-  index directory rather than the current one. (Mark Miller, Marcin Rzewuck)
+  index directory rather than the current one. (Mark Miller, Marcin Rzewucki)

 * SOLR-3876: Solr Admin UI is completely dysfunctional on IE 9 (steffkes)

@ -503,6 +508,17 @@ Bug Fixes
  import works fine with SolrCloud clusters (Deniz Durmus, James Dyer,
  Erick Erickson, shalin)

+* SOLR-4291: Harden the Overseer work queue thread loop. (Mark Miller)
+
+* SOLR-3820: Solr Admin Query form is missing some edismax request parameters
+  (steffkes)
+
+* SOLR-4217: post.jar no longer ignores -Dparams when -Durl is used.
+  (Alexandre Rafalovitch, ehatcher)
+
+* SOLR-4303: On replication, if the generation of the master is lower than the
+  slave we need to force a full copy of the index. (Mark Miller, Gregg Donovan)
+
 Other Changes
 ----------------------

@ -580,6 +596,16 @@ Other Changes
 * SOLR-4208: ExtendedDismaxQParserPlugin has been refactored to make 
  subclassing easier. (Tomás Fernández Löbbe, hossman)

+* SOLR-3735: Relocate the example mime-to-extension mapping, and
+  upgrade Velocity Engine to 1.7 (ehatcher)
+
+* SOLR-4287: Removed "apache-" prefix from Solr distribution and artifact
+  filenames. (Ryan Ernst, Robert Muir, Steve Rowe)
+
+* SOLR-4016: Deduplication does not work with atomic/partial updates so
+  disallow atomic update requests which change signature generating fields.
+  (Joel Nothman, yonik, shalin)
+
 ==================  4.0.0 ==================

 Versions of Major Components
@ -862,7 +888,7 @@ Bug Fixes

 * SOLR-3527: SolrCmdDistributor drops some of the important commit attributes
  (maxOptimizeSegments, softCommit, expungeDeletes) when sending a commit to
-  replicas. (Andy Laird, Tomas Fernandez Lobbe, Mark Miller)
+  replicas. (Andy Laird, Tomás Fernández Löbbe, Mark Miller)

 * SOLR-3844: SolrCore reload can fail because it tries to remove the index 
  write lock while already holding it. (Mark Miller)
@ -1273,7 +1299,7 @@ New Features
 * SOLR-571: The autowarmCount for LRUCaches (LRUCache and FastLRUCache) now 
  supports "percentages" which get evaluated  relative the current size of 
  the cache when warming happens. 
-  (Tomas Fernandez Lobbe and hossman)
+  (Tomás Fernández Löbbe and hossman)

 * SOLR-1932: New relevancy function queries: termfreq, tf, docfreq, idf
  norm, maxdoc, numdocs. (yonik)
@ -1644,12 +1670,12 @@ Bug Fixes
  down to it via acceptDocs since LUCENE-1536. (Mike Hugo, yonik)
  
 * SOLR-3214: If you use multiple fl entries rather than a comma separated list, all but the first
-  entry can be ignored if you are using distributed search. (Tomas Fernandez Lobbe via Mark Miller)
+  entry can be ignored if you are using distributed search. (Tomás Fernández Löbbe via Mark Miller)
 
 * SOLR-3352: eDismax: pf2 should kick in for a query with 2 terms (janhoy)

 * SOLR-3361: ReplicationHandler "maxNumberOfBackups" doesn't work if backups are triggered on commit
-  (James Dyer, Tomas Fernandez Lobbe)
+  (James Dyer, Tomás Fernández Löbbe)

 * SOLR-2605: fixed tracking of the 'defaultCoreName' in CoreContainer so that
  CoreAdminHandler could return consistent information regardless of wether
@ -1865,6 +1891,16 @@ Documentation
 * SOLR-2232: Improved README info on solr.solr.home in examples
  (Eric Pugh and hossman)

+==================  3.6.2  ==================
+
+Bug Fixes
+----------------------
+* SOLR-3790: ConcurrentModificationException could be thrown when using hl.fl=*.
+  (yonik, koji)
+
+* SOLR-3589: Edismax parser does not honor mm parameter if analyzer splits a token.
+  (Tom Burton-West, Robert Muir)
+
 ==================  3.6.1  ==================
 More information about this release, including any errata related to the 
 release notes, upgrade instructions, or other changes may be found online at:
@ -1877,7 +1913,7 @@ Bug Fixes
  (Uwe Schindler, Mike McCandless, Robert Muir)
  
 * SOLR-3361: ReplicationHandler "maxNumberOfBackups" doesn't work if backups are triggered on commit
-  (James Dyer, Tomas Fernandez Lobbe)
+  (James Dyer, Tomás Fernández Löbbe)

 * SOLR-3375: Fix charset problems with HttpSolrServer (Roger Håkansson, yonik, siren)

--- a/solr/README.txt
+++ b/solr/README.txt
@ -45,11 +45,11 @@ example/
  Please see example/README.txt for information about running this
  example.

-dist/apache-solr-XX.war
+dist/solr-XX.war
  The Apache Solr Application.  Deploy this WAR file to any servlet
  container to run Apache Solr.

-dist/apache-solr-<component>-XX.jar
+dist/solr-<component>-XX.jar
  The Apache Solr libraries.  To compile Apache Solr Plugins,
  one or more of these will be required.  The core library is
  required at a minimum. (see http://wiki.apache.org/solr/SolrPlugins
--- a/solr/common-build.xml
+++ b/solr/common-build.xml
@ -25,7 +25,7 @@
  
  <property name="Name" value="Solr" />
  <property name="version" value="5.0-SNAPSHOT"/>
-  <property name="fullname" value="apache-${ant.project.name}"/>
+  <property name="fullname" value="${ant.project.name}"/>
  <property name="fullnamever" value="${fullname}-${version}"/>
  <property name="final.name" value="${fullnamever}"/>
  
@ -114,7 +114,7 @@
    <attribute name="property" default="@{name}.uptodate"/>
    <attribute name="classpath.property" default="@{name}.jar"/>
    <!-- set jarfile only, if the target jar file has no generic name -->
-    <attribute name="jarfile" default="${common-solr.dir}/build/contrib/solr-@{name}/apache-solr-@{name}-${version}.jar"/>
+    <attribute name="jarfile" default="${common-solr.dir}/build/contrib/solr-@{name}/solr-@{name}-${version}.jar"/>
    <sequential>
      <!--<echo message="Checking '@{jarfile}' against source folder '${common.dir}/contrib/@{name}/src/java'"/>-->
      <property name="@{classpath.property}" location="@{jarfile}"/>
@ -214,13 +214,13 @@
  </target>

  <target name="check-solr-core-javadocs-uptodate" unless="solr-core-javadocs.uptodate">
-    <uptodate property="solr-core-javadocs.uptodate" targetfile="${build.dir}/solr-core/apache-solr-core-${version}-javadoc.jar">
+    <uptodate property="solr-core-javadocs.uptodate" targetfile="${build.dir}/solr-core/solr-core-${version}-javadoc.jar">
       <srcfiles dir="${common-solr.dir}/core/src/java" includes="**/*.java"/>
    </uptodate>
  </target>

  <target name="check-solrj-javadocs-uptodate" unless="solrj-javadocs.uptodate">
-    <uptodate property="solrj-javadocs.uptodate" targetfile="${build.dir}/solr-solrj/apache-solr-solrj-${version}-javadoc.jar">
+    <uptodate property="solrj-javadocs.uptodate" targetfile="${build.dir}/solr-solrj/solr-solrj-${version}-javadoc.jar">
       <srcfiles dir="${common-solr.dir}/solrj/src/java" includes="**/*.java"/>
    </uptodate>
  </target>
--- a/solr/contrib/uima/README.txt
+++ b/solr/contrib/uima/README.txt
@ -19,7 +19,7 @@ To start using Solr UIMA Metadata Extraction Library you should go through the f

   <lib dir="../../contrib/uima/lib" />
   <lib dir="../../contrib/uima/lucene-libs" />
-   <lib dir="../../dist/" regex="apache-solr-uima-\d.*\.jar" />
+   <lib dir="../../dist/" regex="solr-uima-\d.*\.jar" />

 2. modify your schema.xml adding the fields you want to be hold metadata specifying proper values for type, indexed, stored and multiValued options:

--- a/solr/contrib/uima/src/test-files/uima/solr/collection1/conf/solrconfig.xml
+++ b/solr/contrib/uima/src/test-files/uima/solr/collection1/conf/solrconfig.xml
@ -44,8 +44,8 @@
    in that directory which completely match the regex (anchored on both
    ends) will be included.
  -->
-  <lib dir="../../dist/" regex="apache-solr-cell-\d.*\.jar" />
-  <lib dir="../../dist/" regex="apache-solr-clustering-\d.*\.jar" />
+  <lib dir="../../dist/" regex="solr-cell-\d.*\.jar" />
+  <lib dir="../../dist/" regex="solr-clustering-\d.*\.jar" />
  <!--
    If a dir option (with or without a regex) is used and nothing is
    found that matches, it will be ignored
--- a/solr/contrib/uima/src/test-files/uima/uima-tokenizers-solrconfig.xml
+++ b/solr/contrib/uima/src/test-files/uima/uima-tokenizers-solrconfig.xml
@ -44,8 +44,8 @@
    in that directory which completely match the regex (anchored on both
    ends) will be included.
  -->
-  <lib dir="../../dist/" regex="apache-solr-cell-\d.*\.jar" />
-  <lib dir="../../dist/" regex="apache-solr-clustering-\d.*\.jar" />
+  <lib dir="../../dist/" regex="solr-cell-\d.*\.jar" />
+  <lib dir="../../dist/" regex="solr-clustering-\d.*\.jar" />
  <!--
    If a dir option (with or without a regex) is used and nothing is
    found that matches, it will be ignored
--- a/solr/contrib/velocity/src/test-files/velocity/solr/collection1/conf/solrconfig.xml
+++ b/solr/contrib/velocity/src/test-files/velocity/solr/collection1/conf/solrconfig.xml
@ -24,7 +24,7 @@
  <luceneMatchVersion>${tests.luceneMatchVersion:LUCENE_CURRENT}</luceneMatchVersion>

  <lib dir="../../contrib/velocity/lib" />
-  <lib dir="../../dist/" regex="apache-solr-velocity-\d.*\.jar" />
+  <lib dir="../../dist/" regex="solr-velocity-\d.*\.jar" />
  <dataDir>${solr.data.dir:}</dataDir>


--- a/solr/core/src/java/org/apache/solr/cloud/Overseer.java
+++ b/solr/core/src/java/org/apache/solr/cloud/Overseer.java
@ -22,14 +22,12 @@ import java.util.HashMap;
 import java.util.LinkedHashMap;
 import java.util.List;
 import java.util.Map;
-import java.util.NoSuchElementException;

 import org.apache.solr.common.SolrException;
-import org.apache.solr.common.cloud.ClusterState;
 import org.apache.solr.common.cloud.ClosableThread;
+import org.apache.solr.common.cloud.ClusterState;
 import org.apache.solr.common.cloud.DocCollection;
 import org.apache.solr.common.cloud.DocRouter;
-import org.apache.solr.common.cloud.DocRouter;
 import org.apache.solr.common.cloud.ImplicitDocRouter;
 import org.apache.solr.common.cloud.Replica;
 import org.apache.solr.common.cloud.Slice;
@ -37,7 +35,6 @@ import org.apache.solr.common.cloud.SolrZkClient;
 import org.apache.solr.common.cloud.ZkCoreNodeProps;
 import org.apache.solr.common.cloud.ZkNodeProps;
 import org.apache.solr.common.cloud.ZkStateReader;
-import org.apache.solr.common.cloud.ZooKeeperException;
 import org.apache.solr.handler.component.ShardHandler;
 import org.apache.zookeeper.CreateMode;
 import org.apache.zookeeper.KeeperException;
@ -81,7 +78,9 @@ public class Overseer {
      if (!this.isClosed && amILeader()) {
        // see if there's something left from the previous Overseer and re
        // process all events that were not persisted into cloud state
-          synchronized (reader.getUpdateLock()) { //XXX this only protects against edits inside single node
+        synchronized (reader.getUpdateLock()) { // XXX this only protects
+                                                // against edits inside single
+                                                // node
          try {
            byte[] head = workQueue.peek();
            
@ -92,8 +91,7 @@ public class Overseer {
              
              while (head != null && amILeader()) {
                final ZkNodeProps message = ZkNodeProps.load(head);
-                  final String operation = message
-                      .getStr(QUEUE_OPERATION);
+                final String operation = message.getStr(QUEUE_OPERATION);
                clusterState = processMessage(clusterState, message, operation);
                zkClient.setData(ZkStateReader.CLUSTER_STATE,
                    ZkStateReader.toJSON(clusterState), true);
@ -104,19 +102,20 @@ public class Overseer {
              }
            }
          } catch (KeeperException e) {
-              if (e.code() == KeeperException.Code.SESSIONEXPIRED
-                  || e.code() == KeeperException.Code.CONNECTIONLOSS) {
-                log.warn("Solr cannot talk to ZK");
+            if (e.code() == KeeperException.Code.SESSIONEXPIRED) {
+              log.warn("Solr cannot talk to ZK, exiting Overseer work queue loop", e);
              return;
            }
-              SolrException.log(log, "", e);
-              throw new ZooKeeperException(SolrException.ErrorCode.SERVER_ERROR,
-                  "", e);
+            log.error("Exception in Overseer work queue loop", e);
          } catch (InterruptedException e) {
            Thread.currentThread().interrupt();
            return;
+            
+          } catch (Exception e) {
+            log.error("Exception in Overseer work queue loop", e);
          }
        }
+        
      }
      
      log.info("Starting to work on the main queue");
@ -146,17 +145,17 @@ public class Overseer {
            while (workQueue.poll() != null);
            
          } catch (KeeperException e) {
-            if (e.code() == KeeperException.Code.SESSIONEXPIRED
-                || e.code() == KeeperException.Code.CONNECTIONLOSS) {
-              log.warn("Overseer cannot talk to ZK");
+            if (e.code() == KeeperException.Code.SESSIONEXPIRED) {
+              log.warn("Solr cannot talk to ZK, exiting Overseer main queue loop", e);
              return;
            }
-            SolrException.log(log, "", e);
-            throw new ZooKeeperException(SolrException.ErrorCode.SERVER_ERROR,
-                "", e);
+            log.error("Exception in Overseer main queue loop", e);
          } catch (InterruptedException e) {
            Thread.currentThread().interrupt();
            return;
+            
+          } catch (Exception e) {
+            log.error("Exception in Overseer main queue loop", e);
          }
        }
        
--- a/solr/core/src/java/org/apache/solr/core/CachingDirectoryFactory.java
+++ b/solr/core/src/java/org/apache/solr/core/CachingDirectoryFactory.java
@ -40,7 +40,7 @@ import org.slf4j.LoggerFactory;
 /**
 * A {@link DirectoryFactory} impl base class for caching Directory instances
 * per path. Most DirectoryFactory implementations will want to extend this
- * class and simply implement {@link DirectoryFactory#create(String)}.
+ * class and simply implement {@link DirectoryFactory#create(String, DirContext)}.
 * 
 */
 public abstract class CachingDirectoryFactory extends DirectoryFactory {
@ -202,7 +202,7 @@ public abstract class CachingDirectoryFactory extends DirectoryFactory {
  }
  
  @Override
-  protected abstract Directory create(String path) throws IOException;
+  protected abstract Directory create(String path, DirContext dirContext) throws IOException;
  
  @Override
  public boolean exists(String path) {
@ -218,9 +218,9 @@ public abstract class CachingDirectoryFactory extends DirectoryFactory {
   * java.lang.String)
   */
  @Override
-  public final Directory get(String path, String rawLockType)
+  public final Directory get(String path,  DirContext dirContext, String rawLockType)
      throws IOException {
-    return get(path, rawLockType, false);
+    return get(path, dirContext, rawLockType, false);
  }
  
  /*
@ -230,7 +230,7 @@ public abstract class CachingDirectoryFactory extends DirectoryFactory {
   * java.lang.String, boolean)
   */
  @Override
-  public final Directory get(String path, String rawLockType, boolean forceNew)
+  public final Directory get(String path,  DirContext dirContext, String rawLockType, boolean forceNew)
      throws IOException {
    String fullPath = new File(path).getAbsolutePath();
    synchronized (this) {
@ -264,7 +264,7 @@ public abstract class CachingDirectoryFactory extends DirectoryFactory {
      }
      
      if (directory == null || forceNew) { 
-        directory = create(fullPath);
+        directory = create(fullPath, dirContext);
        
        directory = rateLimit(directory);
        
--- a/solr/core/src/java/org/apache/solr/core/CoreContainer.java
+++ b/solr/core/src/java/org/apache/solr/core/CoreContainer.java
@ -1626,10 +1626,15 @@ public class CoreContainer
    return schema;
  }
  
-  private static final String DEF_SOLR_XML ="<?xml version=\"1.0\" encoding=\"UTF-8\" ?>\n" +
-          "<solr persistent=\"false\">\n" +
-          "  <cores adminPath=\"/admin/cores\" defaultCoreName=\"" + DEFAULT_DEFAULT_CORE_NAME + "\">\n" +
-          "    <core name=\""+ DEFAULT_DEFAULT_CORE_NAME + "\" shard=\"${shard:}\" instanceDir=\"collection1\" />\n" +
-          "  </cores>\n" +
-          "</solr>";
+  private static final String DEF_SOLR_XML = "<?xml version=\"1.0\" encoding=\"UTF-8\" ?>\n"
+      + "<solr persistent=\"false\">\n"
+      + "  <cores adminPath=\"/admin/cores\" defaultCoreName=\""
+      + DEFAULT_DEFAULT_CORE_NAME
+      + "\""
+      + " host=\"${host:}\" hostPort=\"${hostPort:}\" hostContext=\"${hostContext:}\" zkClientTimeout=\"${zkClientTimeout:15000}\""
+      + ">\n"
+      + "    <core name=\""
+      + DEFAULT_DEFAULT_CORE_NAME
+      + "\" shard=\"${shard:}\" collection=\"${collection:}\" instanceDir=\"collection1\" />\n"
+      + "  </cores>\n" + "</solr>";
 }
--- a/solr/core/src/java/org/apache/solr/core/DirectoryFactory.java
+++ b/solr/core/src/java/org/apache/solr/core/DirectoryFactory.java
@ -41,6 +41,8 @@ public abstract class DirectoryFactory implements NamedListInitializedPlugin,
  // A large estimate should currently have no other side effects.
  public static final IOContext IOCONTEXT_NO_CACHE = new IOContext(new FlushInfo(10*1000*1000, 100L*1000*1000*1000));

+  // hint about what the directory contains - default is index directory
+  public enum DirContext {DEFAULT, META_DATA}

  private static final Logger log = LoggerFactory.getLogger(DirectoryFactory.class.getName());
  
@ -71,7 +73,7 @@ public abstract class DirectoryFactory implements NamedListInitializedPlugin,
   * 
   * @throws IOException If there is a low-level I/O error.
   */
-  protected abstract Directory create(String path) throws IOException;
+  protected abstract Directory create(String path,  DirContext dirContext) throws IOException;
  
  /**
   * Returns true if a Directory exists for a given path.
@ -118,7 +120,7 @@ public abstract class DirectoryFactory implements NamedListInitializedPlugin,
   * 
   * @throws IOException If there is a low-level I/O error.
   */
-  public abstract Directory get(String path, String rawLockType)
+  public abstract Directory get(String path, DirContext dirContext, String rawLockType)
      throws IOException;
  
  /**
@ -130,7 +132,7 @@ public abstract class DirectoryFactory implements NamedListInitializedPlugin,
   * 
   * @throws IOException If there is a low-level I/O error.
   */
-  public abstract Directory get(String path, String rawLockType,
+  public abstract Directory get(String path,  DirContext dirContext, String rawLockType,
      boolean forceNew) throws IOException;
  
  /**
--- a/solr/core/src/java/org/apache/solr/core/MMapDirectoryFactory.java
+++ b/solr/core/src/java/org/apache/solr/core/MMapDirectoryFactory.java
@ -22,6 +22,7 @@ import org.apache.lucene.store.LockFactory; // javadocs
 import org.apache.lucene.store.MMapDirectory;
 import org.apache.solr.common.params.SolrParams;
 import org.apache.solr.common.util.NamedList;
+import org.apache.solr.core.DirectoryFactory.DirContext;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;

@ -56,7 +57,7 @@ public class MMapDirectoryFactory extends StandardDirectoryFactory {
  }

  @Override
-  protected Directory create(String path) throws IOException {
+  protected Directory create(String path, DirContext dirContext) throws IOException {
    MMapDirectory mapDirectory = new MMapDirectory(new File(path), null, maxChunk);
    try {
      mapDirectory.setUseUnmap(unmapHack);
--- a/solr/core/src/java/org/apache/solr/core/NIOFSDirectoryFactory.java
+++ b/solr/core/src/java/org/apache/solr/core/NIOFSDirectoryFactory.java
@ -18,6 +18,7 @@ package org.apache.solr.core;

 import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.NIOFSDirectory;
+import org.apache.solr.core.DirectoryFactory.DirContext;

 import java.io.File;
 import java.io.IOException;
@ -30,7 +31,7 @@ import java.io.IOException;
 public class NIOFSDirectoryFactory extends StandardDirectoryFactory {

  @Override
-  protected Directory create(String path) throws IOException {
+  protected Directory create(String path, DirContext dirContext) throws IOException {
    return new NIOFSDirectory(new File(path));
  }
  
--- a/solr/core/src/java/org/apache/solr/core/NRTCachingDirectoryFactory.java
+++ b/solr/core/src/java/org/apache/solr/core/NRTCachingDirectoryFactory.java
@ -25,6 +25,7 @@ import org.apache.lucene.store.FSDirectory;
 import org.apache.lucene.store.NRTCachingDirectory;
 import org.apache.solr.common.params.SolrParams;
 import org.apache.solr.common.util.NamedList;
+import org.apache.solr.core.DirectoryFactory.DirContext;

 /**
 * Factory to instantiate {@link org.apache.lucene.store.NRTCachingDirectory}
@ -48,7 +49,7 @@ public class NRTCachingDirectoryFactory extends StandardDirectoryFactory {
  }

  @Override
-  protected Directory create(String path) throws IOException {
+  protected Directory create(String path, DirContext dirContext) throws IOException {
    return new NRTCachingDirectory(FSDirectory.open(new File(path)), maxMergeSizeMB, maxCachedMB);
  }

--- a/solr/core/src/java/org/apache/solr/core/RAMDirectoryFactory.java
+++ b/solr/core/src/java/org/apache/solr/core/RAMDirectoryFactory.java
@ -28,7 +28,7 @@ import org.apache.lucene.store.RAMDirectory;
 public class RAMDirectoryFactory extends EphemeralDirectoryFactory {

  @Override
-  protected Directory create(String path) throws IOException {
+  protected Directory create(String path, DirContext dirContext) throws IOException {
    return new RAMDirectory();
  }

--- a/solr/core/src/java/org/apache/solr/core/SimpleFSDirectoryFactory.java
+++ b/solr/core/src/java/org/apache/solr/core/SimpleFSDirectoryFactory.java
@ -18,6 +18,7 @@ package org.apache.solr.core;

 import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.SimpleFSDirectory;
+import org.apache.solr.core.DirectoryFactory.DirContext;

 import java.io.File;
 import java.io.IOException;
@ -30,7 +31,7 @@ import java.io.IOException;
 public class SimpleFSDirectoryFactory extends StandardDirectoryFactory {

  @Override
-  protected Directory create(String path) throws IOException {
+  protected Directory create(String path, DirContext dirContext) throws IOException {
    return new SimpleFSDirectory(new File(path));
  }

--- a/solr/core/src/java/org/apache/solr/core/SolrCore.java
+++ b/solr/core/src/java/org/apache/solr/core/SolrCore.java
@ -69,6 +69,7 @@ import org.apache.solr.common.params.SolrParams;
 import org.apache.solr.common.util.ExecutorUtil;
 import org.apache.solr.common.util.NamedList;
 import org.apache.solr.common.util.SimpleOrderedMap;
+import org.apache.solr.core.DirectoryFactory.DirContext;
 import org.apache.solr.handler.SnapPuller;
 import org.apache.solr.handler.admin.ShowFileRequestHandler;
 import org.apache.solr.handler.component.DebugComponent;
@ -237,7 +238,7 @@ public final class SolrCore implements SolrInfoMBean {
    Properties p = new Properties();
    Directory dir = null;
    try {
-      dir = getDirectoryFactory().get(getDataDir(), getSolrConfig().indexConfig.lockType);
+      dir = getDirectoryFactory().get(getDataDir(), DirContext.META_DATA, getSolrConfig().indexConfig.lockType);
      if (dir.fileExists(SnapPuller.INDEX_PROPERTIES)){
        final IndexInput input = dir.openInput(SnapPuller.INDEX_PROPERTIES, IOContext.DEFAULT);
  
@ -454,7 +455,7 @@ public final class SolrCore implements SolrInfoMBean {

      if (indexExists && firstTime && !reload) {
        
-        Directory dir = directoryFactory.get(indexDir,
+        Directory dir = directoryFactory.get(indexDir, DirContext.DEFAULT,
            getSolrConfig().indexConfig.lockType);
        try {
          if (IndexWriter.isLocked(dir)) {
--- a/solr/core/src/java/org/apache/solr/core/StandardDirectoryFactory.java
+++ b/solr/core/src/java/org/apache/solr/core/StandardDirectoryFactory.java
@ -35,7 +35,7 @@ import org.apache.lucene.store.IOContext;
 public class StandardDirectoryFactory extends CachingDirectoryFactory {

  @Override
-  protected Directory create(String path) throws IOException {
+  protected Directory create(String path, DirContext dirContext) throws IOException {
    return FSDirectory.open(new File(path));
  }
  
--- a/solr/core/src/java/org/apache/solr/handler/ReplicationHandler.java
+++ b/solr/core/src/java/org/apache/solr/handler/ReplicationHandler.java
@ -60,6 +60,7 @@ import org.apache.solr.core.IndexDeletionPolicyWrapper;
 import org.apache.solr.core.SolrCore;
 import org.apache.solr.core.SolrDeletionPolicy;
 import org.apache.solr.core.SolrEventListener;
+import org.apache.solr.core.DirectoryFactory.DirContext;
 import org.apache.solr.request.SolrQueryRequest;
 import org.apache.solr.response.BinaryQueryResponseWriter;
 import org.apache.solr.response.SolrQueryResponse;
@ -361,7 +362,7 @@ public class ReplicationHandler extends RequestHandlerBase implements SolrCoreAw
      // use a set to workaround possible Lucene bug which returns same file
      // name multiple times
      Collection<String> files = new HashSet<String>(commit.getFileNames());
-      dir = core.getDirectoryFactory().get(core.getNewIndexDir(), core.getSolrConfig().indexConfig.lockType);
+      dir = core.getDirectoryFactory().get(core.getNewIndexDir(), DirContext.DEFAULT, core.getSolrConfig().indexConfig.lockType);
      try {
        
        for (String fileName : files) {
@ -467,7 +468,7 @@ public class ReplicationHandler extends RequestHandlerBase implements SolrCoreAw
    Directory dir;
    long size = 0;
    try {
-      dir = core.getDirectoryFactory().get(core.getNewIndexDir(), core.getSolrConfig().indexConfig.lockType);
+      dir = core.getDirectoryFactory().get(core.getNewIndexDir(), DirContext.DEFAULT, core.getSolrConfig().indexConfig.lockType);
      try {
        size = DirectoryFactory.sizeOfDirectory(dir);
      } finally {
--- a/solr/core/src/java/org/apache/solr/handler/SnapPuller.java
+++ b/solr/core/src/java/org/apache/solr/handler/SnapPuller.java
@ -86,6 +86,7 @@ import org.apache.solr.common.util.ExecutorUtil;
 import org.apache.solr.common.util.FastInputStream;
 import org.apache.solr.common.util.NamedList;
 import org.apache.solr.core.CachingDirectoryFactory.CloseListener;
+import org.apache.solr.core.DirectoryFactory.DirContext;
 import org.apache.solr.core.DirectoryFactory;
 import org.apache.solr.core.IndexDeletionPolicyWrapper;
 import org.apache.solr.core.SolrCore;
@ -369,16 +370,18 @@ public class SnapPuller {
      filesDownloaded = Collections.synchronizedList(new ArrayList<Map<String, Object>>());
      // if the generateion of master is older than that of the slave , it means they are not compatible to be copied
      // then a new index direcory to be created and all the files need to be copied
-      boolean isFullCopyNeeded = IndexDeletionPolicyWrapper.getCommitTimestamp(commit) >= latestVersion || forceReplication;
+      boolean isFullCopyNeeded = IndexDeletionPolicyWrapper
+          .getCommitTimestamp(commit) >= latestVersion
+          || commit.getGeneration() >= latestGeneration || forceReplication;

      String tmpIdxDirName = "index." + new SimpleDateFormat(SnapShooter.DATE_FMT, Locale.ROOT).format(new Date());
      tmpIndex = createTempindexDir(core, tmpIdxDirName);

-      tmpIndexDir = core.getDirectoryFactory().get(tmpIndex, core.getSolrConfig().indexConfig.lockType);
+      tmpIndexDir = core.getDirectoryFactory().get(tmpIndex, DirContext.DEFAULT, core.getSolrConfig().indexConfig.lockType);
      
      // make sure it's the newest known index dir...
      indexDirPath = core.getNewIndexDir();
-      indexDir = core.getDirectoryFactory().get(indexDirPath, core.getSolrConfig().indexConfig.lockType);
+      indexDir = core.getDirectoryFactory().get(indexDirPath, DirContext.DEFAULT, core.getSolrConfig().indexConfig.lockType);
      Directory oldDirectory = null;

      try {
@ -542,7 +545,7 @@ public class SnapPuller {
    long replicationTimeTaken = (replicationTime - getReplicationStartTime()) / 1000;
    Directory dir = null;
    try {
-      dir = solrCore.getDirectoryFactory().get(solrCore.getDataDir(), solrCore.getSolrConfig().indexConfig.lockType);
+      dir = solrCore.getDirectoryFactory().get(solrCore.getDataDir(), DirContext.META_DATA, solrCore.getSolrConfig().indexConfig.lockType);
      
      int indexCount = 1, confFilesCount = 1;
      if (props.containsKey(TIMES_INDEX_REPLICATED)) {
@ -725,7 +728,7 @@ public class SnapPuller {
    String indexDir = solrCore.getIndexDir();
    
    // it's okay to use null for lock factory since we know this dir will exist
-    Directory dir = solrCore.getDirectoryFactory().get(indexDir, solrCore.getSolrConfig().indexConfig.lockType);
+    Directory dir = solrCore.getDirectoryFactory().get(indexDir, DirContext.DEFAULT, solrCore.getSolrConfig().indexConfig.lockType);
    try {
      for (Map<String,Object> file : filesToDownload) {
        if (!dir.fileExists((String) file.get(NAME)) || downloadCompleteIndex) {
@ -848,7 +851,7 @@ public class SnapPuller {
    Properties p = new Properties();
    Directory dir = null;
    try {
-      dir = solrCore.getDirectoryFactory().get(solrCore.getDataDir(), solrCore.getSolrConfig().indexConfig.lockType);
+      dir = solrCore.getDirectoryFactory().get(solrCore.getDataDir(), DirContext.META_DATA, solrCore.getSolrConfig().indexConfig.lockType);
      if (dir.fileExists(SnapPuller.INDEX_PROPERTIES)){
        final IndexInput input = dir.openInput(SnapPuller.INDEX_PROPERTIES, DirectoryFactory.IOCONTEXT_NO_CACHE);
  
--- a/solr/core/src/java/org/apache/solr/handler/SnapShooter.java
+++ b/solr/core/src/java/org/apache/solr/handler/SnapShooter.java
@ -35,6 +35,7 @@ import org.apache.lucene.store.Lock;
 import org.apache.lucene.store.SimpleFSLockFactory;
 import org.apache.solr.common.util.NamedList;
 import org.apache.solr.core.DirectoryFactory;
+import org.apache.solr.core.DirectoryFactory.DirContext;
 import org.apache.solr.core.SolrCore;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@ -102,7 +103,7 @@ public class SnapShooter {
      Collection<String> files = indexCommit.getFileNames();
      FileCopier fileCopier = new FileCopier();
      
-      Directory dir = solrCore.getDirectoryFactory().get(solrCore.getNewIndexDir(), solrCore.getSolrConfig().indexConfig.lockType);
+      Directory dir = solrCore.getDirectoryFactory().get(solrCore.getNewIndexDir(), DirContext.DEFAULT, solrCore.getSolrConfig().indexConfig.lockType);
      try {
        fileCopier.copyFiles(dir, files, snapShotDir);
      } finally {
--- a/solr/core/src/java/org/apache/solr/handler/admin/CoreAdminHandler.java
+++ b/solr/core/src/java/org/apache/solr/handler/admin/CoreAdminHandler.java
@ -57,6 +57,7 @@ import org.apache.solr.core.CoreContainer;
 import org.apache.solr.core.CoreDescriptor;
 import org.apache.solr.core.DirectoryFactory;
 import org.apache.solr.core.SolrCore;
+import org.apache.solr.core.DirectoryFactory.DirContext;
 import org.apache.solr.handler.RequestHandlerBase;
 import org.apache.solr.request.LocalSolrQueryRequest;
 import org.apache.solr.request.SolrQueryRequest;
@ -367,7 +368,7 @@ public class CoreAdminHandler extends RequestHandlerBase {
          dirsToBeReleased = new Directory[dirNames.length];
          DirectoryFactory dirFactory = core.getDirectoryFactory();
          for (int i = 0; i < dirNames.length; i++) {
-            Directory dir = dirFactory.get(dirNames[i], core.getSolrConfig().indexConfig.lockType);
+            Directory dir = dirFactory.get(dirNames[i], DirContext.DEFAULT, core.getSolrConfig().indexConfig.lockType);
            dirsToBeReleased[i] = dir;
            // TODO: why doesn't this use the IR factory? what is going on here?
            readersToBeClosed[i] = DirectoryReader.open(dir);
@ -688,6 +689,8 @@ public class CoreAdminHandler extends RequestHandlerBase {
    SolrParams params = req.getParams();

    String cname = params.get(CoreAdminParams.CORE);
+    String indexInfo = params.get(CoreAdminParams.INDEX_INFO);
+    boolean isIndexInfoNeeded = Boolean.parseBoolean(null == indexInfo ? "true" : indexInfo);
    boolean doPersist = false;
    NamedList<Object> status = new SimpleOrderedMap<Object>();
    Map<String,Exception> allFailures = coreContainer.getCoreInitFailures();
@ -695,7 +698,7 @@ public class CoreAdminHandler extends RequestHandlerBase {
      if (cname == null) {
        rsp.add("defaultCoreName", coreContainer.getDefaultCoreName());
        for (String name : coreContainer.getCoreNames()) {
-          status.add(name, getCoreStatus(coreContainer, name));
+          status.add(name, getCoreStatus(coreContainer, name, isIndexInfoNeeded));
        }
        rsp.add("initFailures", allFailures);
      } else {
@ -703,7 +706,7 @@ public class CoreAdminHandler extends RequestHandlerBase {
          ? Collections.singletonMap(cname, allFailures.get(cname))
          : Collections.emptyMap();
        rsp.add("initFailures", failures);
-        status.add(cname, getCoreStatus(coreContainer, cname));
+        status.add(cname, getCoreStatus(coreContainer, cname, isIndexInfoNeeded));
      }
      rsp.add("status", status);
      doPersist = false; // no state change
@ -987,7 +990,7 @@ public class CoreAdminHandler extends RequestHandlerBase {
    
  }

-  protected NamedList<Object> getCoreStatus(CoreContainer cores, String cname) throws IOException {
+  protected NamedList<Object> getCoreStatus(CoreContainer cores, String cname, boolean isIndexInfoNeeded) throws IOException {
    NamedList<Object> info = new SimpleOrderedMap<Object>();
    SolrCore core = cores.getCore(cname);
    if (core != null) {
@ -1000,6 +1003,7 @@ public class CoreAdminHandler extends RequestHandlerBase {
        info.add("schema", core.getSchemaResource());
        info.add("startTime", new Date(core.getStartTime()));
        info.add("uptime", System.currentTimeMillis() - core.getStartTime());
+        if (isIndexInfoNeeded) {
          RefCounted<SolrIndexSearcher> searcher = core.getSearcher();
          try {
            SimpleOrderedMap<Object> indexInfo = LukeRequestHandler.getIndexInfo(searcher.get().getIndexReader());
@ -1010,6 +1014,7 @@ public class CoreAdminHandler extends RequestHandlerBase {
          } finally {
            searcher.decref();
          }
+        }
      } finally {
        core.close();
      }
@ -1022,9 +1027,9 @@ public class CoreAdminHandler extends RequestHandlerBase {
    long size = 0;
    try {
      if (!core.getDirectoryFactory().exists(core.getIndexDir())) {
-        dir = core.getDirectoryFactory().get(core.getNewIndexDir(), core.getSolrConfig().indexConfig.lockType);
+        dir = core.getDirectoryFactory().get(core.getNewIndexDir(), DirContext.DEFAULT, core.getSolrConfig().indexConfig.lockType);
      } else {
-        dir = core.getDirectoryFactory().get(core.getIndexDir(), core.getSolrConfig().indexConfig.lockType); 
+        dir = core.getDirectoryFactory().get(core.getIndexDir(), DirContext.DEFAULT, core.getSolrConfig().indexConfig.lockType); 
      }

      try {
--- a/solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java
+++ b/solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java
@ -49,6 +49,7 @@ import org.apache.solr.core.DirectoryFactory;
 import org.apache.solr.core.SolrConfig;
 import org.apache.solr.core.SolrCore;
 import org.apache.solr.core.SolrInfoMBean;
+import org.apache.solr.core.DirectoryFactory.DirContext;
 import org.apache.solr.request.LocalSolrQueryRequest;
 import org.apache.solr.request.SolrQueryRequest;
 import org.apache.solr.request.SolrRequestInfo;
@ -119,7 +120,7 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable,SolrIn

  public SolrIndexSearcher(SolrCore core, String path, IndexSchema schema, SolrIndexConfig config, String name, boolean enableCache, DirectoryFactory directoryFactory) throws IOException {
    // we don't need to reserve the directory because we get it from the factory
-    this(core, schema,name, core.getIndexReaderFactory().newReader(directoryFactory.get(path, config.lockType), core), true, enableCache, false, directoryFactory);
+    this(core, schema,name, core.getIndexReaderFactory().newReader(directoryFactory.get(path, DirContext.DEFAULT, config.lockType), core), true, enableCache, false, directoryFactory);
  }

  private static String getIndexDir(Directory dir) {
--- a/solr/core/src/java/org/apache/solr/update/SolrIndexWriter.java
+++ b/solr/core/src/java/org/apache/solr/update/SolrIndexWriter.java
@ -32,6 +32,7 @@ import org.apache.lucene.util.InfoStream;
 import org.apache.lucene.util.PrintStreamInfoStream;
 import org.apache.lucene.util.ThreadInterruptedException;
 import org.apache.solr.core.DirectoryFactory;
+import org.apache.solr.core.DirectoryFactory.DirContext;
 import org.apache.solr.schema.IndexSchema;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@ -58,7 +59,7 @@ public class SolrIndexWriter extends IndexWriter {
  public static SolrIndexWriter create(String name, String path, DirectoryFactory directoryFactory, boolean create, IndexSchema schema, SolrIndexConfig config, IndexDeletionPolicy delPolicy, Codec codec, boolean forceNewDirectory) throws IOException {

    SolrIndexWriter w = null;
-    final Directory d = directoryFactory.get(path, config.lockType, forceNewDirectory);
+    final Directory d = directoryFactory.get(path, DirContext.DEFAULT, config.lockType, forceNewDirectory);
    try {
      w = new SolrIndexWriter(name, path, d, create, schema, 
                              config, delPolicy, codec);
--- a/solr/core/src/java/org/apache/solr/update/processor/SignatureUpdateProcessorFactory.java
+++ b/solr/core/src/java/org/apache/solr/update/processor/SignatureUpdateProcessorFactory.java
@ -134,7 +134,13 @@ public class SignatureUpdateProcessorFactory
      if (enabled) {
        SolrInputDocument doc = cmd.getSolrInputDocument();
        List<String> currDocSigFields = null;
+        boolean isPartialUpdate = DistributedUpdateProcessor.isAtomicUpdate(cmd);
        if (sigFields == null || sigFields.size() == 0) {
+          if (isPartialUpdate)  {
+            throw new SolrException
+                (ErrorCode.SERVER_ERROR,
+                    "Can't use SignatureUpdateProcessor with partial updates on signature fields");
+          }
          Collection<String> docFields = doc.getFieldNames();
          currDocSigFields = new ArrayList<String>(docFields.size());
          currDocSigFields.addAll(docFields);
@ -149,6 +155,12 @@ public class SignatureUpdateProcessorFactory
        for (String field : currDocSigFields) {
          SolrInputField f = doc.getField(field);
          if (f != null) {
+            if (isPartialUpdate)  {
+              throw new SolrException
+                  (ErrorCode.SERVER_ERROR,
+                      "Can't use SignatureUpdateProcessor with partial update request " +
+                          "containing signature field: " + field);
+            }
            sig.add(field);
            Object o = f.getValue();
            if (o instanceof Collection) {
--- a/solr/core/src/java/org/apache/solr/util/SimplePostTool.java
+++ b/solr/core/src/java/org/apache/solr/util/SimplePostTool.java
@ -196,7 +196,8 @@ public class SimplePostTool {
        fatal("System Property 'data' is not valid for this tool: " + mode);
      }
      String params = System.getProperty("params", "");
-      urlStr = System.getProperty("url", SimplePostTool.appendParam(DEFAULT_POST_URL, params));
+      urlStr = System.getProperty("url", DEFAULT_POST_URL);
+      urlStr = SimplePostTool.appendParam(urlStr, params);
      URL url = new URL(urlStr);
      boolean auto = isOn(System.getProperty("auto", DEFAULT_AUTO));
      String type = System.getProperty("type");
@ -800,7 +801,7 @@ public class SimplePostTool {
            " " + urlc.getResponseMessage() + " for url "+url);
      }
    } catch (IOException e) {
-      warn("An error occured posting data to "+url+". Please check that Solr is running.");
+      warn("An error occurred posting data to "+url+". Please check that Solr is running.");
    }
  }

--- a/solr/core/src/test-files/solr/collection1/conf/solrconfig-tlog.xml
+++ b/solr/core/src/test-files/solr/collection1/conf/solrconfig-tlog.xml
@ -47,6 +47,29 @@
    </updateLog>
  </updateHandler>

+  <updateRequestProcessorChain name="dedupe">
+    <processor class="org.apache.solr.update.processor.SignatureUpdateProcessorFactory">
+      <bool name="enabled">true</bool>
+      <bool name="overwriteDupes">true</bool>
+      <str name="fields">v_t,t_field</str>
+      <str name="signatureClass">org.apache.solr.update.processor.TextProfileSignature</str>
+    </processor>
+    <processor class="solr.RunUpdateProcessorFactory" />
+  </updateRequestProcessorChain>
+  <updateRequestProcessorChain name="stored_sig">
+    <!-- this chain is valid even though the signature field is not
+         indexed, because we are not asking for dups to be overwritten
+      -->
+    <processor class="org.apache.solr.update.processor.SignatureUpdateProcessorFactory">
+      <bool name="enabled">true</bool>
+      <str name="signatureField">non_indexed_signature_sS</str>
+      <bool name="overwriteDupes">false</bool>
+      <str name="fields">v_t,t_field</str>
+      <str name="signatureClass">org.apache.solr.update.processor.TextProfileSignature</str>
+    </processor>
+    <processor class="solr.RunUpdateProcessorFactory" />
+  </updateRequestProcessorChain>
+
  <requestHandler name="/admin/" class="org.apache.solr.handler.admin.AdminHandlers" />

 </config>
--- a/solr/core/src/test/org/apache/solr/core/AlternateDirectoryTest.java
+++ b/solr/core/src/test/org/apache/solr/core/AlternateDirectoryTest.java
@ -22,6 +22,7 @@ import java.io.IOException;
 import org.apache.lucene.index.DirectoryReader;
 import org.apache.lucene.store.Directory;
 import org.apache.solr.SolrTestCaseJ4;
+import org.apache.solr.core.DirectoryFactory.DirContext;
 import org.junit.BeforeClass;
 import org.junit.Test;

@ -46,7 +47,7 @@ public class AlternateDirectoryTest extends SolrTestCaseJ4 {
    public static volatile Directory dir;
    
    @Override
-    public Directory create(String path) throws IOException {
+    public Directory create(String path, DirContext dirContext) throws IOException {
      openCalled = true;

      return dir = newFSDirectory(new File(path));
--- a/solr/core/src/test/org/apache/solr/core/RAMDirectoryFactoryTest.java
+++ b/solr/core/src/test/org/apache/solr/core/RAMDirectoryFactoryTest.java
@ -22,6 +22,7 @@ import java.io.IOException;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.RAMDirectory;
 import org.apache.lucene.util.LuceneTestCase;
+import org.apache.solr.core.DirectoryFactory.DirContext;

 /**
 * Test-case for RAMDirectoryFactory
@ -37,13 +38,13 @@ public class RAMDirectoryFactoryTest extends LuceneTestCase {
    final Directory directory = new RAMDirectory();
    RAMDirectoryFactory factory = new RAMDirectoryFactory()  {
      @Override
-      protected Directory create(String path) {
+      protected Directory create(String path, DirContext dirContext) {
        return directory;
      }
    };
    String path = "/fake/path";
-    Directory dir1 = factory.get(path, null);
-    Directory dir2 = factory.get(path, null);
+    Directory dir1 = factory.get(path, DirContext.DEFAULT, null);
+    Directory dir2 = factory.get(path, DirContext.DEFAULT, null);
    assertEquals("RAMDirectoryFactory should not create new instance of RefCntRamDirectory " +
        "every time open() is called for the same path", dir1, dir2);

@ -53,7 +54,7 @@ public class RAMDirectoryFactoryTest extends LuceneTestCase {

  private void dotestOpenSucceedForEmptyDir() throws IOException {
    RAMDirectoryFactory factory = new RAMDirectoryFactory();
-    Directory dir = factory.get("/fake/path", null);
+    Directory dir = factory.get("/fake/path", DirContext.DEFAULT, null);
    assertNotNull("RAMDirectoryFactory should create RefCntRamDirectory even if the path doen't lead " +
        "to index directory on the file system", dir);
    factory.release(dir);
--- a/solr/core/src/test/org/apache/solr/update/processor/SignatureUpdateProcessorFactoryTest.java
+++ b/solr/core/src/test/org/apache/solr/update/processor/SignatureUpdateProcessorFactoryTest.java
@ -64,7 +64,7 @@ public class SignatureUpdateProcessorFactoryTest extends SolrTestCaseJ4 {
    chain = "dedupe"; // set the default that most tests expect
  }

-  void checkNumDocs(int n) {
+  static void checkNumDocs(int n) {
    SolrQueryRequest req = req();
    try {
      assertEquals(n, req.getSearcher().getIndexReader().numDocs());
@ -354,6 +354,10 @@ public class SignatureUpdateProcessorFactoryTest extends SolrTestCaseJ4 {
  }

  private void addDoc(String doc) throws Exception  {
+    addDoc(doc, chain);
+  }
+
+  static void addDoc(String doc, String chain) throws Exception {
    Map<String, String[]> params = new HashMap<String, String[]>();
    MultiMapSolrParams mmparams = new MultiMapSolrParams(params);
    params.put(UpdateParams.UPDATE_CHAIN, new String[] { chain });
--- a/solr/core/src/test/org/apache/solr/update/processor/TestPartialUpdateDeduplication.java
+++ b/solr/core/src/test/org/apache/solr/update/processor/TestPartialUpdateDeduplication.java
@ -0,0 +1,74 @@
+package org.apache.solr.update.processor;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import com.google.common.collect.Maps;
+import org.apache.noggit.ObjectBuilder;
+import org.apache.solr.SolrTestCaseJ4;
+import org.apache.solr.client.solrj.request.UpdateRequest;
+import org.apache.solr.common.SolrInputDocument;
+import org.apache.solr.core.SolrCore;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+import java.util.List;
+import java.util.Map;
+
+import static org.apache.solr.update.processor.SignatureUpdateProcessorFactoryTest.addDoc;
+
+public class TestPartialUpdateDeduplication extends SolrTestCaseJ4 {
+  @BeforeClass
+  public static void beforeClass() throws Exception {
+    initCore("solrconfig-tlog.xml", "schema15.xml");
+  }
+
+  @Test
+  public void testPartialUpdates() throws Exception {
+    SignatureUpdateProcessorFactoryTest.checkNumDocs(0);
+    String chain = "dedupe";
+    // partial update
+    SolrInputDocument doc = new SolrInputDocument();
+    doc.addField("id", "2a");
+    Map<String, Object> map = Maps.newHashMap();
+    map.put("set", "Hello Dude man!");
+    doc.addField("v_t", map);
+    UpdateRequest req = new UpdateRequest();
+    req.add(doc);
+    boolean exception_ok = false;
+    try {
+      addDoc(req.getXML(), chain);
+    } catch (Exception e) {
+      exception_ok = true;
+    }
+    assertTrue("Should have gotten an exception with partial update on signature generating field",
+        exception_ok);
+
+    SignatureUpdateProcessorFactoryTest.checkNumDocs(0);
+    addDoc(adoc("id", "2a", "v_t", "Hello Dude man!", "name", "ali babi'"), chain);
+    doc = new SolrInputDocument();
+    doc.addField("id", "2a");
+    map = Maps.newHashMap();
+    map.put("set", "name changed");
+    doc.addField("name", map);
+    req = new UpdateRequest();
+    req.add(doc);
+    addDoc(req.getXML(), chain);
+    addDoc(commit(), chain);
+    SignatureUpdateProcessorFactoryTest.checkNumDocs(1);
+  }
+}
--- a/solr/core/src/test/org/apache/solr/util/SimplePostToolTest.java
+++ b/solr/core/src/test/org/apache/solr/util/SimplePostToolTest.java
@ -56,6 +56,7 @@ public class SimplePostToolTest extends SolrTestCaseJ4 {
    t_web = SimplePostTool.parseArgsAndInit(args);

    System.setProperty("params", "param1=foo&param2=bar");
+    System.setProperty("url", "http://localhost:5150/solr/update");
    t_test = SimplePostTool.parseArgsAndInit(args);

    pf = new MockPageFetcher();
@ -76,7 +77,7 @@ public class SimplePostToolTest extends SolrTestCaseJ4 {
    assertEquals(1, t_web.recursive);
    assertEquals(10, t_web.delay);
    
-    assertNotNull(t_test.solrUrl);
+    assertEquals("http://localhost:5150/solr/update?param1=foo&param2=bar",t_test.solrUrl.toExternalForm());
  }
  
  @Test
--- a/solr/example/example-DIH/solr/db/conf/solrconfig.xml
+++ b/solr/example/example-DIH/solr/db/conf/solrconfig.xml
@ -28,7 +28,7 @@

  <jmx />

-  <lib dir="../../../../dist/" regex="apache-solr-dataimporthandler-.*\.jar" />
+  <lib dir="../../../../dist/" regex="solr-dataimporthandler-.*\.jar" />

  <!-- <indexConfig> section could go here, but we want the defaults -->

--- a/solr/example/example-DIH/solr/mail/conf/solrconfig.xml
+++ b/solr/example/example-DIH/solr/mail/conf/solrconfig.xml
@ -34,7 +34,7 @@
  <lib dir="../../../../contrib/extraction/lib" />

  <lib dir="../../../../contrib/dataimporthandler/lib/" regex=".*jar$" />
-  <lib dir="../../../../dist/" regex="apache-solr-dataimporthandler-.*\.jar" />
+  <lib dir="../../../../dist/" regex="solr-dataimporthandler-.*\.jar" />
  
  <!-- <indexConfig> section could go here, but we want the defaults -->

--- a/solr/example/example-DIH/solr/rss/conf/solrconfig.xml
+++ b/solr/example/example-DIH/solr/rss/conf/solrconfig.xml
@ -28,7 +28,7 @@

  <jmx />

-  <lib dir="../../../../dist/" regex="apache-solr-dataimporthandler-.*\.jar" />
+  <lib dir="../../../../dist/" regex="solr-dataimporthandler-.*\.jar" />

  <!-- <indexConfig> section could go here, but we want the defaults -->

--- a/solr/example/example-DIH/solr/solr/conf/solrconfig.xml
+++ b/solr/example/example-DIH/solr/solr/conf/solrconfig.xml
@ -28,7 +28,7 @@

  <jmx />

-  <lib dir="../../../../dist/" regex="apache-solr-dataimporthandler-.*\.jar" />
+  <lib dir="../../../../dist/" regex="solr-dataimporthandler-.*\.jar" />

  <!-- <indexConfig> section could go here, but we want the defaults -->

--- a/Show More
+++ b/Show More