LUCENE-3892: merge trunk

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/pforcodec_3892@1374578 13f79535-47bb-0310-9956-ffa450edef68
2012-08-18 13:35:11 +00:00 · 2012-08-18 13:35:11 +00:00 · bfcd96c689
parent dddbe41902 4c057cf4bf
commit bfcd96c689
169 changed files with 3911 additions and 720 deletions
--- a/dev-tools/eclipse/dot.classpath
+++ b/dev-tools/eclipse/dot.classpath
@ -145,21 +145,11 @@
  <classpathentry kind="lib" path="solr/contrib/extraction/lib/bcmail-jdk15-1.45.jar"/>
  <classpathentry kind="lib" path="solr/contrib/extraction/lib/bcprov-jdk15-1.45.jar"/>
  <classpathentry kind="lib" path="solr/contrib/extraction/lib/boilerpipe-1.1.0.jar"/>
-	<classpathentry kind="lib" path="solr/contrib/extraction/lib/commons-compress-1.3.jar"/>
  <classpathentry kind="lib" path="solr/contrib/extraction/lib/dom4j-1.6.1.jar"/>
-	<classpathentry kind="lib" path="solr/contrib/extraction/lib/fontbox-1.6.0.jar"/>
-	<classpathentry kind="lib" path="solr/contrib/extraction/lib/jempbox-1.6.0.jar"/>
  <classpathentry kind="lib" path="solr/contrib/extraction/lib/metadata-extractor-2.4.0-beta-1.jar"/>
  <classpathentry kind="lib" path="solr/contrib/extraction/lib/netcdf-4.2-min.jar"/>
-	<classpathentry kind="lib" path="solr/contrib/extraction/lib/pdfbox-1.6.0.jar"/>
-	<classpathentry kind="lib" path="solr/contrib/extraction/lib/poi-3.8-beta5.jar"/>
-	<classpathentry kind="lib" path="solr/contrib/extraction/lib/poi-ooxml-3.8-beta5.jar"/>
-	<classpathentry kind="lib" path="solr/contrib/extraction/lib/poi-ooxml-schemas-3.8-beta5.jar"/>
-	<classpathentry kind="lib" path="solr/contrib/extraction/lib/poi-scratchpad-3.8-beta5.jar"/>
  <classpathentry kind="lib" path="solr/contrib/extraction/lib/rome-0.9.jar"/>
  <classpathentry kind="lib" path="solr/contrib/extraction/lib/tagsoup-1.2.1.jar"/>
-	<classpathentry kind="lib" path="solr/contrib/extraction/lib/tika-core-1.1.jar"/>
-	<classpathentry kind="lib" path="solr/contrib/extraction/lib/tika-parsers-1.1.jar"/>
  <classpathentry kind="lib" path="solr/contrib/extraction/lib/xmlbeans-2.3.0.jar"/>
  <classpathentry kind="lib" path="solr/contrib/langid/lib/langdetect-1.1-20120112.jar"/>
  <classpathentry kind="lib" path="solr/contrib/langid/lib/jsonic-1.2.7.jar"/>
@ -175,5 +165,25 @@
  <classpathentry kind="lib" path="solr/contrib/velocity/lib/commons-collections-3.2.1.jar"/>
  <classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER"/>
  <classpathentry kind="lib" path="lucene/test-framework/lib/randomizedtesting-runner-2.0.0.rc5.jar"/>
+  <classpathentry kind="lib" path="solr/contrib/extraction/lib/apache-mime4j-core-0.7.2.jar"/>
+  <classpathentry kind="lib" path="solr/contrib/extraction/lib/apache-mime4j-dom-0.7.2.jar"/>
+  <classpathentry kind="lib" path="solr/contrib/extraction/lib/commons-compress-1.4.1.jar"/>
+  <classpathentry kind="lib" path="solr/contrib/extraction/lib/fontbox-1.7.0.jar"/>
+  <classpathentry kind="lib" path="solr/contrib/extraction/lib/icu4j-49.1.jar"/>
+  <classpathentry kind="lib" path="solr/contrib/extraction/lib/isoparser-1.0-RC-1.jar"/>
+  <classpathentry kind="lib" path="solr/contrib/extraction/lib/jdom-1.0.jar"/>
+  <classpathentry kind="lib" path="solr/contrib/extraction/lib/jempbox-1.7.0.jar"/>
+  <classpathentry kind="lib" path="solr/contrib/extraction/lib/juniversalchardet-1.0.3.jar"/>
+  <classpathentry kind="lib" path="solr/contrib/extraction/lib/pdfbox-1.7.0.jar"/>
+  <classpathentry kind="lib" path="solr/contrib/extraction/lib/poi-3.8.jar"/>
+  <classpathentry kind="lib" path="solr/contrib/extraction/lib/poi-ooxml-3.8.jar"/>
+  <classpathentry kind="lib" path="solr/contrib/extraction/lib/poi-ooxml-schemas-3.8.jar"/>
+  <classpathentry kind="lib" path="solr/contrib/extraction/lib/poi-scratchpad-3.8.jar"/>
+  <classpathentry kind="lib" path="solr/contrib/extraction/lib/tika-core-1.2.jar"/>
+  <classpathentry kind="lib" path="solr/contrib/extraction/lib/tika-parsers-1.2.jar"/>
+  <classpathentry kind="lib" path="solr/contrib/extraction/lib/vorbis-java-core-0.1.jar"/>
+  <classpathentry kind="lib" path="solr/contrib/extraction/lib/vorbis-java-tika-0.1.jar"/>
+  <classpathentry kind="lib" path="solr/contrib/extraction/lib/xercesImpl-2.9.1.jar"/>
+  <classpathentry kind="lib" path="solr/contrib/extraction/lib/xz-1.0.jar"/>
  <classpathentry kind="output" path="bin/other"/>
 </classpath>
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@ -75,6 +75,14 @@ Bug Fixes
  encoders / stemmers via the ResourceLoader now instead of Class.forName().
  Solr users should now no longer have to embed these in its war. (David Smiley)

+* SOLR-3737: StempelPolishStemFilterFactory loaded its stemmer table incorrectly.
+  Also, ensure immutability and use only one instance of this table in RAM (lazy
+  loaded) since its quite large. (sausarkar, Steven Rowe, Robert Muir)
+
+* LUCENE-4310: MappingCharFilter was failing to match input strings
+  containing non-BMP Unicode characters.  (Dawid Weiss, Robert Muir,
+  Mike McCandless)
+
 Build

 * LUCENE-3985: Upgrade to randomizedtesting 2.0.0. Added support for 
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/NormalizeCharMap.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/NormalizeCharMap.java
@ -111,9 +111,8 @@ public class NormalizeCharMap {
        final org.apache.lucene.util.fst.Builder<CharsRef> builder = new org.apache.lucene.util.fst.Builder<CharsRef>(FST.INPUT_TYPE.BYTE2, outputs);
        final IntsRef scratch = new IntsRef();
        for(Map.Entry<String,String> ent : pendingPairs.entrySet()) {
-          builder.add(Util.toUTF32(ent.getKey(), scratch),
+          builder.add(Util.toUTF16(ent.getKey(), scratch),
                      new CharsRef(ent.getValue()));
-      
        }
        map = builder.finish();
        pendingPairs.clear();
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/charfilter/TestMappingCharFilter.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/charfilter/TestMappingCharFilter.java
@ -33,6 +33,7 @@ import org.apache.lucene.analysis.CharFilter;
 import org.apache.lucene.analysis.MockTokenizer;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.util.UnicodeUtil;
 import org.apache.lucene.util._TestUtil;

 public class TestMappingCharFilter extends BaseTokenStreamTestCase {
@ -55,6 +56,11 @@ public class TestMappingCharFilter extends BaseTokenStreamTestCase {

    builder.add( "empty", "" );

+    // BMP (surrogate pair):
+    builder.add(UnicodeUtil.newString(new int[] {0x1D122}, 0, 1), "fclef");
+
+    builder.add("\uff01", "full-width-exclamation");
+
    normMap = builder.build();
  }

@ -128,6 +134,18 @@ public class TestMappingCharFilter extends BaseTokenStreamTestCase {
    assertTokenStreamContents(ts, new String[0], new int[]{}, new int[]{}, 5);
  }

+  public void testNonBMPChar() throws Exception {
+    CharFilter cs = new MappingCharFilter( normMap, new StringReader( UnicodeUtil.newString(new int[] {0x1D122}, 0, 1) ) );
+    TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
+    assertTokenStreamContents(ts, new String[]{"fclef"}, new int[]{0}, new int[]{2}, 2);
+  }
+
+  public void testFullWidthChar() throws Exception {
+    CharFilter cs = new MappingCharFilter( normMap, new StringReader( "\uff01") );
+    TokenStream ts = new MockTokenizer(cs, MockTokenizer.WHITESPACE, false);
+    assertTokenStreamContents(ts, new String[]{"full-width-exclamation"}, new int[]{0}, new int[]{1}, 1);
+  }
+
  //
  //                1111111111222
  //      01234567890123456789012
--- a/lucene/analysis/stempel/src/java/org/apache/lucene/analysis/pl/PolishAnalyzer.java
+++ b/lucene/analysis/stempel/src/java/org/apache/lucene/analysis/pl/PolishAnalyzer.java
@ -58,6 +58,13 @@ public final class PolishAnalyzer extends StopwordAnalyzerBase {
    return DefaultsHolder.DEFAULT_STOP_SET;
  }
  
+  /**
+   * Returns an unmodifiable instance of the default stemmer table.
+   */
+  public static Trie getDefaultTable() {
+    return DefaultsHolder.DEFAULT_TABLE;
+  }
+  
  /**
   * Atomically loads the DEFAULT_STOP_SET in a lazy fashion once the outer class 
   * accesses the static final set the first time.;
--- a/lucene/analysis/stempel/src/java/org/apache/lucene/analysis/stempel/StempelPolishStemFilterFactory.java
+++ b/lucene/analysis/stempel/src/java/org/apache/lucene/analysis/stempel/StempelPolishStemFilterFactory.java
@ -17,28 +17,17 @@ package org.apache.lucene.analysis.stempel;
 * limitations under the License.
 */

-import java.io.IOException;
-
 import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.pl.PolishAnalyzer;
 import org.apache.lucene.analysis.stempel.StempelFilter;
 import org.apache.lucene.analysis.stempel.StempelStemmer;
-import org.apache.lucene.analysis.util.ResourceLoader;
-import org.apache.lucene.analysis.util.ResourceLoaderAware;
 import org.apache.lucene.analysis.util.TokenFilterFactory;
-import org.egothor.stemmer.Trie;

 /**
 * Factory for {@link StempelFilter} using a Polish stemming table.
 */
-public class StempelPolishStemFilterFactory extends TokenFilterFactory implements ResourceLoaderAware {
-  private Trie stemmer = null;
-  private static final String STEMTABLE = "/org/apache/lucene/analysis/pl/stemmer_20000.tbl";
-  
+public class StempelPolishStemFilterFactory extends TokenFilterFactory {  
  public TokenStream create(TokenStream input) {
-    return new StempelFilter(input, new StempelStemmer(stemmer));
-  }
-
-  public void inform(ResourceLoader loader) throws IOException {
-    stemmer = StempelStemmer.load(loader.openResource(STEMTABLE));
+    return new StempelFilter(input, new StempelStemmer(PolishAnalyzer.getDefaultTable()));
  }
 }
--- a/lucene/analysis/stempel/src/java/org/egothor/stemmer/Trie.java
+++ b/lucene/analysis/stempel/src/java/org/egothor/stemmer/Trie.java
@ -332,7 +332,7 @@ public class Trie {
   * @param key the key
   * @param cmd the patch command
   */
-  public void add(CharSequence key, CharSequence cmd) {
+  void add(CharSequence key, CharSequence cmd) {
    if (key == null || cmd == null) {
      return;
    }
--- a/lucene/analysis/stempel/src/test/org/apache/lucene/analysis/stempel/TestStempelPolishStemFilterFactory.java
+++ b/lucene/analysis/stempel/src/test/org/apache/lucene/analysis/stempel/TestStempelPolishStemFilterFactory.java
@ -22,7 +22,6 @@ import java.io.StringReader;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.core.WhitespaceTokenizer;
-import org.apache.lucene.analysis.util.ClasspathResourceLoader;

 /**
 * Tests for {@link StempelPolishStemFilterFactory}
@ -31,7 +30,6 @@ public class TestStempelPolishStemFilterFactory extends BaseTokenStreamTestCase
  public void testBasics() throws Exception {
    StringReader document = new StringReader("studenta studenci");
    StempelPolishStemFilterFactory factory = new StempelPolishStemFilterFactory();
-    factory.inform(new ClasspathResourceLoader(getClass()));
    TokenStream ts = factory.create(new WhitespaceTokenizer(TEST_VERSION_CURRENT, document));
    assertTokenStreamContents(ts,
        new String[] { "student", "student" });
--- a/lucene/build.xml
+++ b/lucene/build.xml
@ -234,10 +234,10 @@
      <check-missing-javadocs dir="build/docs/analyzers-stempel" level="class"/>
      <check-missing-javadocs dir="build/docs/analyzers-uima" level="class"/>
      <check-missing-javadocs dir="build/docs/benchmark" level="class"/>
-      <!-- core: problems -->
+      <check-missing-javadocs dir="build/docs/core" level="class"/>
      <check-missing-javadocs dir="build/docs/demo" level="class"/>
      <check-missing-javadocs dir="build/docs/facet" level="class"/>
-      <!-- grouping: problems -->
+      <check-missing-javadocs dir="build/docs/grouping" level="class"/>
      <!-- highlighter: problems -->
      <check-missing-javadocs dir="build/docs/join" level="class"/>
      <check-missing-javadocs dir="build/docs/memory" level="class"/>
@ -247,7 +247,7 @@
      <check-missing-javadocs dir="build/docs/sandbox" level="class"/>
      <!-- spatial: problems -->
      <check-missing-javadocs dir="build/docs/suggest" level="class"/>
-      <!-- test-framework: problems -->
+      <check-missing-javadocs dir="build/docs/test-framework" level="class"/>
    </sequential>
  </target>
  
--- a/lucene/core/src/java/org/apache/lucene/codecs/BlockTermState.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/BlockTermState.java
@ -26,11 +26,15 @@ import org.apache.lucene.index.TermState;
 * terms dict.
 */
 public class BlockTermState extends OrdTermState {
-  public int docFreq;            // how many docs have this term
-  public long totalTermFreq;     // total number of occurrences of this term
+  /** how many docs have this term */
+  public int docFreq;
+  /** total number of occurrences of this term */
+  public long totalTermFreq;

-  public int termBlockOrd;          // the term's ord in the current block
-  public long blockFilePointer;  // fp into the terms dict primary file (_X.tim) that holds this term
+  /** the term's ord in the current block */
+  public int termBlockOrd;
+  /** fp into the terms dict primary file (_X.tim) that holds this term */
+  public long blockFilePointer;

  @Override
  public void copyFrom(TermState _other) {
--- a/lucene/core/src/java/org/apache/lucene/codecs/MultiLevelSkipListReader.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/MultiLevelSkipListReader.java
@ -36,7 +36,7 @@ import org.apache.lucene.util.MathUtil;
 */

 public abstract class MultiLevelSkipListReader {
-  // the maximum number of skip levels possible for this index
+  /** the maximum number of skip levels possible for this index */
  protected int maxNumberOfSkipLevels; 
  
  // number of levels in this skip list
--- a/lucene/core/src/java/org/apache/lucene/codecs/MultiLevelSkipListWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/MultiLevelSkipListWriter.java
@ -52,7 +52,7 @@ import org.apache.lucene.util.MathUtil;
 */

 public abstract class MultiLevelSkipListWriter {
-  // number of levels in this skip list
+  /** number of levels in this skip list */
  protected int numberOfSkipLevels;
  
  // the skip interval in the list with level = 0
@ -93,8 +93,8 @@ public abstract class MultiLevelSkipListWriter {
    }
  }

+  /** creates new buffers or empties the existing ones */
  protected void resetSkip() {
-    // creates new buffers or empties the existing ones
    if (skipBuffer == null) {
      init();
    } else {
--- a/lucene/core/src/java/org/apache/lucene/codecs/memory/DirectPostingsFormat.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/memory/DirectPostingsFormat.java
@ -1796,7 +1796,7 @@ public class DirectPostingsFormat extends PostingsFormat {
  }

  // Docs + freqs:
-  public final static class HighFreqDocsEnum extends DocsEnum {
+  private final static class HighFreqDocsEnum extends DocsEnum {
    private int[] docIDs;
    private int[] freqs;
    private final Bits liveDocs;
@ -1969,7 +1969,7 @@ public class DirectPostingsFormat extends PostingsFormat {
  }

  // TODO: specialize offsets and not
-  public final static class HighFreqDocsAndPositionsEnum extends DocsAndPositionsEnum {
+  private final static class HighFreqDocsAndPositionsEnum extends DocsAndPositionsEnum {
    private int[] docIDs;
    private int[] freqs;
    private int[][] positions;
--- a/lucene/core/src/java/org/apache/lucene/codecs/sep/IntIndexInput.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/sep/IntIndexInput.java
@ -36,7 +36,7 @@ public abstract class IntIndexInput implements Closeable {

  public abstract Index index() throws IOException;
  
-  // TODO: -- can we simplify this?
+  /** Records a single skip-point in the {@link IntIndexInput.Reader}. */
  public abstract static class Index {

    public abstract void read(DataInput indexIn, boolean absolute) throws IOException;
@ -50,6 +50,7 @@ public abstract class IntIndexInput implements Closeable {
    public abstract Index clone();
  }

+  /** Reads int values. */
  public abstract static class Reader {

    /** Reads next single int */
--- a/lucene/core/src/java/org/apache/lucene/codecs/sep/IntIndexOutput.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/sep/IntIndexOutput.java
@ -38,6 +38,7 @@ public abstract class IntIndexOutput implements Closeable {
   * >= 0.  */
  public abstract void write(int v) throws IOException;

+  /** Records a single skip-point in the IndexOutput. */
  public abstract static class Index {

    /** Internally records the current location */
--- a/lucene/core/src/java/org/apache/lucene/codecs/sep/IntStreamFactory.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/sep/IntStreamFactory.java
@ -22,8 +22,15 @@ import org.apache.lucene.store.IOContext;

 import java.io.IOException;

-/** @lucene.experimental */
+/** Provides int reader and writer to specified files.
+ *
+ * @lucene.experimental */
 public abstract class IntStreamFactory {
+  /** Create an {@link IntIndexInput} on the provided
+   *  fileName. */
  public abstract IntIndexInput openInput(Directory dir, String fileName, IOContext context) throws IOException;
+
+  /** Create an {@link IntIndexOutput} on the provided
+   *  fileName. */
  public abstract IntIndexOutput createOutput(Directory dir, String fileName, IOContext context) throws IOException;
 }
--- a/lucene/core/src/java/org/apache/lucene/index/DocTermOrds.java
+++ b/lucene/core/src/java/org/apache/lucene/index/DocTermOrds.java
@ -119,10 +119,13 @@ public class DocTermOrds {
  protected final String field;

  protected int numTermsInField;
-  protected long termInstances; // total number of references to term numbers
+  /** total number of references to term numbers */
+  protected long termInstances;
  private long memsz;
-  protected int total_time;  // total time to uninvert the field
-  protected int phase1_time;  // time for phase1 of the uninvert process
+  /** total time to uninvert the field */
+  protected int total_time;
+  /** time for phase1 of the uninvert process */
+  protected int phase1_time;

  protected int[] index;
  protected byte[][] tnums = new byte[256][];
@ -234,7 +237,7 @@ public class DocTermOrds {
  protected void setActualDocFreq(int termNum, int df) throws IOException {
  }

-  // Call this only once (if you subclass!)
+  /** Call this only once (if you subclass!) */
  protected void uninvert(final AtomicReader reader, final BytesRef termPrefix) throws IOException {
    //System.out.println("DTO uninvert field=" + field + " prefix=" + termPrefix);
    final long startTime = System.currentTimeMillis();
--- a/lucene/core/src/java/org/apache/lucene/index/FieldInfos.java
+++ b/lucene/core/src/java/org/apache/lucene/index/FieldInfos.java
@ -267,11 +267,11 @@ public class FieldInfos implements Iterable<FieldInfo> {
      return addOrUpdateInternal(name, -1, isIndexed, storeTermVector, omitNorms, storePayloads, indexOptions, docValues, normType);
    }

-    // NOTE: this method does not carry over termVector
-    // booleans nor docValuesType; the indexer chain
-    // (TermVectorsConsumerPerField, DocFieldProcessor) must
-    // set these fields when they succeed in consuming
-    // the document:
+    /** NOTE: this method does not carry over termVector
+     *  booleans nor docValuesType; the indexer chain
+     *  (TermVectorsConsumerPerField, DocFieldProcessor) must
+     *  set these fields when they succeed in consuming
+     *  the document */
    public FieldInfo addOrUpdate(String name, IndexableFieldType fieldType) {
      // TODO: really, indexer shouldn't even call this
      // method (it's only called from DocFieldProcessor);
--- a/lucene/core/src/java/org/apache/lucene/index/MergePolicy.java
+++ b/lucene/core/src/java/org/apache/lucene/index/MergePolicy.java
@ -243,6 +243,10 @@ public abstract class MergePolicy implements java.io.Closeable, Cloneable {
    }
  }

+  /** Thrown when a merge was explicity aborted because
+   *  {@link IndexWriter#close(boolean)} was called with
+   *  <code>false</code>.  Normally this exception is
+   *  privately caught and suppresed by {@link IndexWriter}.  */
  public static class MergeAbortedException extends IOException {
    public MergeAbortedException() {
      super("merge is aborted");
--- a/lucene/core/src/java/org/apache/lucene/index/MergeState.java
+++ b/lucene/core/src/java/org/apache/lucene/index/MergeState.java
@ -29,6 +29,9 @@ import org.apache.lucene.util.packed.PackedInts;
 * @lucene.experimental */
 public class MergeState {

+  /**
+   * Remaps docids around deletes during merge
+   */
  public static abstract class DocMap {
    private final Bits liveDocs;

@ -197,6 +200,9 @@ public class MergeState {
  public SegmentReader[] matchingSegmentReaders;
  public int matchedCount;
  
+  /**
+   * Class for recording units of work when merging segments.
+   */
  public static class CheckAbort {
    private double workCount;
    private final MergePolicy.OneMerge merge;
--- a/lucene/core/src/java/org/apache/lucene/index/MultiDocValues.java
+++ b/lucene/core/src/java/org/apache/lucene/index/MultiDocValues.java
@ -43,7 +43,7 @@ import org.apache.lucene.util.packed.PackedInts.Reader;
 * @lucene.experimental
 * @lucene.internal
 */
-public class MultiDocValues extends DocValues {
+class MultiDocValues extends DocValues {
  
  private static DocValuesPuller DEFAULT_PULLER = new DocValuesPuller();
  private static final DocValuesPuller NORMS_PULLER = new DocValuesPuller() {
--- a/lucene/core/src/java/org/apache/lucene/index/MultiDocsAndPositionsEnum.java
+++ b/lucene/core/src/java/org/apache/lucene/index/MultiDocsAndPositionsEnum.java
@ -143,6 +143,8 @@ public final class MultiDocsAndPositionsEnum extends DocsAndPositionsEnum {
  }

  // TODO: implement bulk read more efficiently than super
+  /** Holds a {@link DocsAndPositionsEnum} along with the
+   *  corresponding {@link ReaderSlice}. */
  public final static class EnumWithSlice {
    public DocsAndPositionsEnum docsAndPositionsEnum;
    public ReaderSlice slice;
--- a/lucene/core/src/java/org/apache/lucene/index/MultiDocsEnum.java
+++ b/lucene/core/src/java/org/apache/lucene/index/MultiDocsEnum.java
@ -122,6 +122,8 @@ public final class MultiDocsEnum extends DocsEnum {
  }

  // TODO: implement bulk read more efficiently than super
+  /** Holds a {@link DocsEnum} along with the
+   *  corresponding {@link ReaderSlice}. */
  public final static class EnumWithSlice {
    public DocsEnum docsEnum;
    public ReaderSlice slice;
--- a/lucene/core/src/java/org/apache/lucene/index/SegmentInfos.java
+++ b/lucene/core/src/java/org/apache/lucene/index/SegmentInfos.java
@ -133,7 +133,8 @@ public final class SegmentInfos implements Cloneable, Iterable<SegmentInfoPerCom
                               // or wrote; this is normally the same as generation except if
                               // there was an IOException that had interrupted a commit

-  public Map<String,String> userData = Collections.<String,String>emptyMap();       // Opaque Map<String, String> that user can specify during IndexWriter.commit
+  /** Opaque Map&lt;String, String&gt; that user can specify during IndexWriter.commit */
+  public Map<String,String> userData = Collections.<String,String>emptyMap();
  
  private List<SegmentInfoPerCommit> segments = new ArrayList<SegmentInfoPerCommit>();
  
--- a/lucene/core/src/java/org/apache/lucene/index/SegmentReadState.java
+++ b/lucene/core/src/java/org/apache/lucene/index/SegmentReadState.java
@ -30,11 +30,11 @@ public class SegmentReadState {
  public final FieldInfos fieldInfos;
  public final IOContext context;

-  // NOTE: if this is < 0, that means "defer terms index
-  // load until needed".  But if the codec must load the
-  // terms index on init (preflex is the only once currently
-  // that must do so), then it should negate this value to
-  // get the app's terms divisor:
+  /** NOTE: if this is &lt; 0, that means "defer terms index
+   *  load until needed".  But if the codec must load the
+   *  terms index on init (preflex is the only once currently
+   *  that must do so), then it should negate this value to
+   *  get the app's terms divisor */
  public int termsIndexDivisor;
  public final String segmentSuffix;

--- a/lucene/core/src/java/org/apache/lucene/index/SegmentWriteState.java
+++ b/lucene/core/src/java/org/apache/lucene/index/SegmentWriteState.java
@ -33,11 +33,11 @@ public class SegmentWriteState {
  public final FieldInfos fieldInfos;
  public int delCountOnFlush;

-  // Deletes to apply while we are flushing the segment.  A
-  // Term is enrolled in here if it was deleted at one
-  // point, and it's mapped to the docIDUpto, meaning any
-  // docID < docIDUpto containing this term should be
-  // deleted.
+  /** Deletes to apply while we are flushing the segment.  A
+   *  Term is enrolled in here if it was deleted at one
+   *  point, and it's mapped to the docIDUpto, meaning any
+   *  docID &lt; docIDUpto containing this term should be
+   *  deleted. */
  public final BufferedDeletes segDeletes;

  // Lazily created:
--- a/lucene/core/src/java/org/apache/lucene/index/SortedBytesMergeUtils.java
+++ b/lucene/core/src/java/org/apache/lucene/index/SortedBytesMergeUtils.java
@ -32,6 +32,9 @@ import org.apache.lucene.util.PriorityQueue;
 import org.apache.lucene.util.packed.PackedInts;

 /**
+ * Utility class for merging SortedBytes DocValues
+ * instances.
+ *  
 * @lucene.internal
 */
 public final class SortedBytesMergeUtils {
@ -54,7 +57,14 @@ public final class SortedBytesMergeUtils {
    }
    return new MergeContext(comp, mergeDocCount, size, type);
  }
-
+  /**
+   * Encapsulates contextual information about the merge. 
+   * This class holds document id to ordinal mappings, offsets for
+   * variable length values and the comparator to sort the merged
+   * bytes.
+   * 
+   * @lucene.internal
+   */
  public static final class MergeContext {
    private final Comparator<BytesRef> comp;
    private final BytesRef missingValue = new BytesRef();
@ -169,10 +179,36 @@ public final class SortedBytesMergeUtils {
    return merger.currentOrd;
  }
  
+  /**
+   * Implementation of this interface consume the merged bytes with their
+   * corresponding ordinal and byte offset. The offset is the byte offset in
+   * target sorted source where the currently merged {@link BytesRef} instance
+   * should be stored at.
+   */
  public static interface BytesRefConsumer {
+    
+    /**
+     * Consumes a single {@link BytesRef}. The provided {@link BytesRef}
+     * instances are strictly increasing with respect to the used
+     * {@link Comparator} used for merging
+     * 
+     * @param ref
+     *          the {@link BytesRef} to consume
+     * @param ord
+     *          the ordinal of the given {@link BytesRef} in the merge target
+     * @param offset
+     *          the byte offset of the given {@link BytesRef} in the merge
+     *          target
+     * @throws IOException
+     *           if an {@link IOException} occurs
+     */
    public void consume(BytesRef ref, int ord, long offset) throws IOException;
  }
  
+  /**
+   * A simple {@link BytesRefConsumer} that writes the merged {@link BytesRef}
+   * instances sequentially to an {@link IndexOutput}.
+   */
  public static final class IndexOutputBytesRefConsumer implements BytesRefConsumer {
    private final IndexOutput datOut;
    
@ -187,6 +223,14 @@ public final class SortedBytesMergeUtils {
    }
  }
  
+  /**
+   * {@link RecordMerger} merges a list of {@link SortedSourceSlice} lazily by
+   * consuming the sorted source records one by one and de-duplicates records
+   * that are shared across slices. The algorithm is based on a lazy priority queue
+   * that prevents reading merge sources into heap memory. 
+   * 
+   * @lucene.internal
+   */
  private static final class RecordMerger {
    private final MergeQueue queue;
    private final SortedSourceSlice[] top;
@ -231,6 +275,12 @@ public final class SortedBytesMergeUtils {
    }
  }

+  /**
+   * {@link SortedSourceSlice} represents a single {@link SortedSource} merge candidate.
+   * It encapsulates ordinal and pre-calculated target doc id to ordinal mappings.
+   * This class also holds state private to the merge process.
+   * @lucene.internal
+   */
  public static class SortedSourceSlice {
    final SortedSource source;
    final int readerIdx;
--- a/lucene/core/src/java/org/apache/lucene/search/FieldCache.java
+++ b/lucene/core/src/java/org/apache/lucene/search/FieldCache.java
@ -45,6 +45,9 @@ import org.apache.lucene.util.packed.PackedInts;
 */
 public interface FieldCache {

+  /**
+   * Placeholder indicating creation of this cache is currently in-progress.
+   */
  public static final class CreationPlaceholder {
    Object value;
  }
--- a/lucene/core/src/java/org/apache/lucene/search/FieldComparator.java
+++ b/lucene/core/src/java/org/apache/lucene/search/FieldComparator.java
@ -194,6 +194,9 @@ public abstract class FieldComparator<T> {
   *  than the provided value. */
  public abstract int compareDocToValue(int doc, T value) throws IOException;

+  /**
+   * Base FieldComparator class for numeric types
+   */
  public static abstract class NumericComparator<T extends Number> extends FieldComparator<T> {
    protected final T missingValue;
    protected final String field;
--- a/lucene/core/src/java/org/apache/lucene/search/FieldValueHitQueue.java
+++ b/lucene/core/src/java/org/apache/lucene/search/FieldValueHitQueue.java
@ -33,6 +33,10 @@ import org.apache.lucene.util.PriorityQueue;
 */
 public abstract class FieldValueHitQueue<T extends FieldValueHitQueue.Entry> extends PriorityQueue<T> {

+  /**
+   * Extension of ScoreDoc to also store the 
+   * {@link FieldComparator} slot.
+   */
  public static class Entry extends ScoreDoc {
    public int slot;

--- a/lucene/core/src/java/org/apache/lucene/search/FuzzyTermsEnum.java
+++ b/lucene/core/src/java/org/apache/lucene/search/FuzzyTermsEnum.java
@ -398,12 +398,17 @@ public class FuzzyTermsEnum extends TermsEnum {
    return scale_factor;
  }
  
-  /** @lucene.internal */
+  /**
+   * reuses compiled automata across different segments,
+   * because they are independent of the index
+   * @lucene.internal */
  public static interface LevenshteinAutomataAttribute extends Attribute {
    public List<CompiledAutomaton> automata();
  }
    
-  /** @lucene.internal */
+  /** 
+   * Stores compiled automata as a list (indexed by edit distance)
+   * @lucene.internal */
  public static final class LevenshteinAutomataAttributeImpl extends AttributeImpl implements LevenshteinAutomataAttribute {
    private final List<CompiledAutomaton> automata = new ArrayList<CompiledAutomaton>();
      
--- a/lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java
+++ b/lucene/core/src/java/org/apache/lucene/search/IndexSearcher.java
@ -82,7 +82,7 @@ public class IndexSearcher {
  // in the next release
  protected final IndexReaderContext readerContext;
  protected final List<AtomicReaderContext> leafContexts;
-  // used with executor - each slice holds a set of leafs executed within one thread
+  /** used with executor - each slice holds a set of leafs executed within one thread */
  protected final LeafSlice[] leafSlices;

  // These are only used for multi-threaded search
--- a/lucene/core/src/java/org/apache/lucene/search/ScoringRewrite.java
+++ b/lucene/core/src/java/org/apache/lucene/search/ScoringRewrite.java
@ -32,7 +32,11 @@ import org.apache.lucene.util.BytesRefHash;
 import org.apache.lucene.util.RamUsageEstimator;
 import org.apache.lucene.util.BytesRefHash.DirectBytesStartArray;

-/** @lucene.internal Only public to be accessible by spans package. */
+/** 
+ * Base rewrite method that translates each term into a query, and keeps
+ * the scores as computed by the query.
+ * <p>
+ * @lucene.internal Only public to be accessible by spans package. */
 public abstract class ScoringRewrite<Q extends Query> extends TermCollectingRewrite<Q> {

  /** A rewrite method that first translates each term into
--- a/lucene/core/src/java/org/apache/lucene/search/SortField.java
+++ b/lucene/core/src/java/org/apache/lucene/search/SortField.java
@ -38,6 +38,9 @@ import org.apache.lucene.util.StringHelper;
 */
 public class SortField {

+  /**
+   * Specifies the type of the terms to be sorted, or special types such as CUSTOM
+   */
  public static enum Type {

    /** Sort by document score (relevance).  Sort values are Float and higher
--- a/lucene/core/src/java/org/apache/lucene/search/TimeLimitingCollector.java
+++ b/lucene/core/src/java/org/apache/lucene/search/TimeLimitingCollector.java
@ -225,6 +225,8 @@ public class TimeLimitingCollector extends Collector {
  }

  /**
+   * Thread used to timeout search requests.
+   * Can be stopped completely with {@link TimerThread#stopTimer()}
   * @lucene.experimental
   */
  public static final class TimerThread extends Thread  {
--- a/lucene/core/src/java/org/apache/lucene/search/TopDocsCollector.java
+++ b/lucene/core/src/java/org/apache/lucene/search/TopDocsCollector.java
@ -33,8 +33,8 @@ import org.apache.lucene.util.PriorityQueue;
 */
 public abstract class TopDocsCollector<T extends ScoreDoc> extends Collector {

-  // This is used in case topDocs() is called with illegal parameters, or there
-  // simply aren't (enough) results.
+  /** This is used in case topDocs() is called with illegal parameters, or there
+   *  simply aren't (enough) results. */
  protected static final TopDocs EMPTY_TOPDOCS = new TopDocs(0, new ScoreDoc[0], Float.NaN);
  
  /**
--- a/lucene/core/src/java/org/apache/lucene/store/FSDirectory.java
+++ b/lucene/core/src/java/org/apache/lucene/store/FSDirectory.java
@ -436,6 +436,9 @@ public abstract class FSDirectory extends Directory {
    return chunkSize;
  }

+  /**
+   * Writes output with {@link RandomAccessFile#write(byte[], int, int)}
+   */
  protected static class FSIndexOutput extends BufferedIndexOutput {
    private final FSDirectory parent;
    private final String name;
--- a/lucene/core/src/java/org/apache/lucene/store/NIOFSDirectory.java
+++ b/lucene/core/src/java/org/apache/lucene/store/NIOFSDirectory.java
@ -106,6 +106,9 @@ public class NIOFSDirectory extends FSDirectory {
    };
  }

+  /**
+   * Reads bytes with {@link FileChannel#read(ByteBuffer, long)}
+   */
  protected static class NIOFSIndexInput extends SimpleFSDirectory.SimpleFSIndexInput {

    private ByteBuffer byteBuf; // wraps the buffer for NIO
--- a/lucene/core/src/java/org/apache/lucene/store/RAMFile.java
+++ b/lucene/core/src/java/org/apache/lucene/store/RAMFile.java
@ -19,7 +19,9 @@ package org.apache.lucene.store;

 import java.util.ArrayList;

-/** @lucene.internal */
+/** 
+ * Represents a file in RAM as a list of byte[] buffers.
+ * @lucene.internal */
 public class RAMFile {
  protected ArrayList<byte[]> buffers = new ArrayList<byte[]>();
  long length;
--- a/lucene/core/src/java/org/apache/lucene/store/SimpleFSDirectory.java
+++ b/lucene/core/src/java/org/apache/lucene/store/SimpleFSDirectory.java
@ -85,8 +85,16 @@ public class SimpleFSDirectory extends FSDirectory {
    };
  }

+  /**
+   * Reads bytes with {@link RandomAccessFile#seek(long)} followed by
+   * {@link RandomAccessFile#read(byte[], int, int)}.  
+   */
  protected static class SimpleFSIndexInput extends BufferedIndexInput {
  
+    /**
+     * Extension of RandomAccessFile that tracks if the file is 
+     * open.
+     */
    protected static class Descriptor extends RandomAccessFile {
      // remember if the file is open, so that we don't try to close it
      // more than once
--- a/lucene/core/src/java/org/apache/lucene/util/ByteBlockPool.java
+++ b/lucene/core/src/java/org/apache/lucene/util/ByteBlockPool.java
@ -117,10 +117,13 @@ public final class ByteBlockPool {
  public byte[][] buffers = new byte[10][];

  int bufferUpto = -1;                        // Which buffer we are upto
-  public int byteUpto = BYTE_BLOCK_SIZE;             // Where we are in head buffer
+  /** Where we are in head buffer */
+  public int byteUpto = BYTE_BLOCK_SIZE;

-  public byte[] buffer;                              // Current head buffer
-  public int byteOffset = -BYTE_BLOCK_SIZE;          // Current head offset
+  /** Current head buffer */
+  public byte[] buffer;
+  /** Current head offset */
+  public int byteOffset = -BYTE_BLOCK_SIZE;

  private final Allocator allocator;

--- a/lucene/core/src/java/org/apache/lucene/util/FuzzySet.java
+++ b/lucene/core/src/java/org/apache/lucene/util/FuzzySet.java
@ -48,6 +48,11 @@ public class FuzzySet {
  
  public static final int FUZZY_SERIALIZATION_VERSION=1;
  
+  /**
+   * Result from {@link FuzzySet#contains(BytesRef)}:
+   * can never return definitively YES (always MAYBE), 
+   * but can sometimes definitely return NO.
+   */
  public enum ContainsResult {
    MAYBE, NO
  };
--- a/lucene/core/src/java/org/apache/lucene/util/fst/FST.java
+++ b/lucene/core/src/java/org/apache/lucene/util/fst/FST.java
@ -158,7 +158,7 @@ public final class FST<T> {
  private final boolean packed;
  private PackedInts.Reader nodeRefToAddress;

-  // If arc has this label then that arc is final/accepted
+  /** If arc has this label then that arc is final/accepted */
  public static final int END_LABEL = -1;

  private boolean allowArrayArcs = true;
@ -174,7 +174,7 @@ public final class FST<T> {
    // building an FST w/ willPackFST=true:
    int node;

-    // To node (ord or address):
+    /** To node (ord or address) */
    public int target;

    byte flags;
@ -542,8 +542,8 @@ public final class FST<T> {
    return v;
  }

-  // returns true if the node at this address has any
-  // outgoing arcs
+  /** returns true if the node at this address has any
+   *  outgoing arcs */
  public static<T> boolean targetHasArcs(Arc<T> arc) {
    return arc.target > 0;
  }
--- a/lucene/core/src/java/org/apache/lucene/util/fst/Util.java
+++ b/lucene/core/src/java/org/apache/lucene/util/fst/Util.java
@ -767,6 +767,19 @@ public final class Util {
    }
  }

+  /** Just maps each UTF16 unit (char) to the ints in an
+   *  IntsRef. */
+  public static IntsRef toUTF16(CharSequence s, IntsRef scratch) {
+    final int charLimit = s.length();
+    scratch.offset = 0;
+    scratch.length = charLimit;
+    scratch.grow(charLimit);
+    for (int idx = 0; idx < charLimit; idx++) {
+      scratch.ints[idx] = (int) s.charAt(idx);
+    }
+    return scratch;
+  }    
+
  /** Decodes the Unicode codepoints from the provided
   *  CharSequence and places them in the provided scratch
   *  IntsRef, which must not be null, returning it. */
--- a/lucene/core/src/java/org/apache/lucene/util/packed/PackedInts.java
+++ b/lucene/core/src/java/org/apache/lucene/util/packed/PackedInts.java
@ -734,7 +734,7 @@ public class PackedInts {
        }
        return new Packed64(in, valueCount, bitsPerValue);
      default:
-        throw new AssertionError("Unknwown Writer format: " + format);
+        throw new AssertionError("Unknown Writer format: " + format);
    }
  }

--- a/lucene/core/src/test/org/apache/lucene/search/TestSimilarityProvider.java
+++ b/lucene/core/src/test/org/apache/lucene/search/TestSimilarityProvider.java
@ -20,12 +20,13 @@ package org.apache.lucene.search;
 import org.apache.lucene.analysis.MockAnalyzer;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
+import org.apache.lucene.index.AtomicReader;
+import org.apache.lucene.index.DirectoryReader;
 import org.apache.lucene.index.FieldInvertState;
-import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.IndexWriterConfig;
-import org.apache.lucene.index.MultiDocValues;
 import org.apache.lucene.index.Norm;
 import org.apache.lucene.index.RandomIndexWriter;
+import org.apache.lucene.index.SlowCompositeReaderWrapper;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.search.similarities.PerFieldSimilarityWrapper;
 import org.apache.lucene.search.similarities.Similarity;
@ -36,7 +37,7 @@ import org.apache.lucene.util.LuceneTestCase;

 public class TestSimilarityProvider extends LuceneTestCase {
  private Directory directory;
-  private IndexReader reader;
+  private DirectoryReader reader;
  private IndexSearcher searcher;
  
  @Override
@ -75,8 +76,9 @@ public class TestSimilarityProvider extends LuceneTestCase {
  public void testBasics() throws Exception {
    // sanity check of norms writer
    // TODO: generalize
-    byte fooNorms[] = (byte[]) MultiDocValues.getNormDocValues(reader, "foo").getSource().getArray();
-    byte barNorms[] = (byte[]) MultiDocValues.getNormDocValues(reader, "bar").getSource().getArray();
+    AtomicReader slow = new SlowCompositeReaderWrapper(reader);
+    byte fooNorms[] = (byte[]) slow.normValues("foo").getSource().getArray();
+    byte barNorms[] = (byte[]) slow.normValues("bar").getSource().getArray();
    for (int i = 0; i < fooNorms.length; i++) {
      assertFalse(fooNorms[i] == barNorms[i]);
    }
--- a/lucene/core/src/test/org/apache/lucene/util/junitcompat/TestLeaveFilesIfTestFails.java
+++ b/lucene/core/src/test/org/apache/lucene/util/junitcompat/TestLeaveFilesIfTestFails.java
@ -0,0 +1,49 @@
+package org.apache.lucene.util.junitcompat;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.File;
+
+import org.apache.lucene.util._TestUtil;
+import org.junit.Assert;
+import org.junit.Test;
+import org.junit.runner.JUnitCore;
+import org.junit.runner.Result;
+
+public class TestLeaveFilesIfTestFails extends WithNestedTests {
+  public TestLeaveFilesIfTestFails() {
+    super(true);
+  }
+  
+  public static class Nested1 extends WithNestedTests.AbstractNestedTest {
+    static File file;
+    public void testDummy() {
+      file = _TestUtil.getTempDir("leftover");
+      file.mkdirs();
+      fail();
+    }
+  }
+
+  @Test
+  public void testLeaveFilesIfTestFails() {
+    Result r = JUnitCore.runClasses(Nested1.class);
+    Assert.assertEquals(1, r.getFailureCount());
+    Assert.assertTrue(Nested1.file.exists());
+    Nested1.file.delete();
+  }
+}
--- a/lucene/grouping/src/java/org/apache/lucene/search/grouping/AbstractDistinctValuesCollector.java
+++ b/lucene/grouping/src/java/org/apache/lucene/search/grouping/AbstractDistinctValuesCollector.java
@ -44,6 +44,10 @@ public abstract class AbstractDistinctValuesCollector<GC extends AbstractDistinc
  public void setScorer(Scorer scorer) throws IOException {
  }

+  /**
+   * Returned by {@link AbstractDistinctValuesCollector#getGroups()},
+   * representing the value and set of distinct values for the group.
+   */
  public abstract static class GroupCount<GROUP_VALUE_TYPE> {

    public final GROUP_VALUE_TYPE groupValue;
--- a/lucene/grouping/src/java/org/apache/lucene/search/grouping/CollectedSearchGroup.java
+++ b/lucene/grouping/src/java/org/apache/lucene/search/grouping/CollectedSearchGroup.java
@ -17,7 +17,12 @@

 package org.apache.lucene.search.grouping;

-/** @lucene.internal */
+import org.apache.lucene.search.FieldComparator; // javadocs
+
+/** 
+ * Expert: representation of a group in {@link AbstractFirstPassGroupingCollector},
+ * tracking the top doc and {@link FieldComparator} slot.
+ * @lucene.internal */
 public class CollectedSearchGroup<T> extends SearchGroup<T> {
  int topDoc;
  int comparatorSlot;
--- a/lucene/module-build.xml
+++ b/lucene/module-build.xml
@ -90,6 +90,28 @@
    </sequential>
  </macrodef>

+  <property name="test-framework.jar" value="${common.dir}/build/test-framework/lucene-test-framework-${version}.jar"/>
+  <target name="check-test-framework-uptodate" unless="test-framework.uptodate">
+    <module-uptodate name="test-framework" jarfile="${test-framework.jar}" property="test-framework.uptodate"/>
+  </target>
+  <target name="jar-test-framework" unless="test-framework.uptodate" depends="check-test-framework-uptodate">
+    <ant dir="${common.dir}/test-framework" target="jar-core" inheritall="false">
+      <propertyset refid="uptodate.and.compiled.properties"/>
+    </ant>
+    <property name="test-framework.uptodate" value="true"/>
+  </target>
+
+  <property name="test-framework-javadoc.jar" value="${common.dir}/build/test-framework/lucene-test-framework-${version}-javadoc.jar"/>
+  <target name="check-test-framework-javadocs-uptodate" unless="test-framework-javadocs.uptodate">
+    <module-uptodate name="test-framework" jarfile="${test-framework-javadoc.jar}" property="test-framework-javadocs.uptodate"/>
+  </target>
+  <target name="javadocs-test-framework" unless="test-framework-javadocs.uptodate" depends="check-test-framework-javadocs-uptodate">
+    <ant dir="${common.dir}/test-framework" target="javadocs" inheritAll="false">
+      <propertyset refid="uptodate.and.compiled.properties"/>
+    </ant>
+    <property name="test-framework-javadocs.uptodate" value="true"/>
+  </target>
+
  <property name="queryparser.jar" value="${common.dir}/build/queryparser/lucene-queryparser-${version}.jar"/>
  <target name="check-queryparser-uptodate" unless="queryparser.uptodate">
    <module-uptodate name="queryparser" jarfile="${queryparser.jar}" property="queryparser.uptodate"/>
--- a/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/DoubleConstValueSource.java
+++ b/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/DoubleConstValueSource.java
@ -24,6 +24,9 @@ import org.apache.lucene.queries.function.docvalues.DoubleDocValues;
 import java.io.IOException;
 import java.util.Map;

+/**
+ * Function that returns a constant double value for every document.
+ */
 public class DoubleConstValueSource extends ConstNumberSource {
  final double constant;
  private final float fv;
--- a/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/IDFValueSource.java
+++ b/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/IDFValueSource.java
@ -28,7 +28,13 @@ import org.apache.lucene.util.BytesRef;
 import java.io.IOException;
 import java.util.Map;

-/** @lucene.internal */
+/** 
+ * Function that returns {@link TFIDFSimilarity #idf(long, long)}
+ * for every document.
+ * <p>
+ * Note that the configured Similarity for the field must be
+ * a subclass of {@link TFIDFSimilarity}
+ * @lucene.internal */
 public class IDFValueSource extends DocFreqValueSource {
  public IDFValueSource(String field, String val, String indexedField, BytesRef indexedBytes) {
    super(field, val, indexedField, indexedBytes);
--- a/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/IfFunction.java
+++ b/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/IfFunction.java
@ -30,6 +30,10 @@ import java.util.List;
 import java.util.Map;


+/**
+ * Depending on the boolean value of the <code>ifSource</code> function,
+ * returns the value of the <code>trueSource</code> or <code>falseSource</code> function.
+ */
 public class IfFunction extends BoolFunction {
  private final ValueSource ifSource;
  private final ValueSource trueSource;
--- a/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/MaxDocValueSource.java
+++ b/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/MaxDocValueSource.java
@ -17,6 +17,7 @@
 package org.apache.lucene.queries.function.valuesource;

 import org.apache.lucene.index.AtomicReaderContext;
+import org.apache.lucene.index.IndexReader; // javadocs
 import org.apache.lucene.queries.function.FunctionValues;
 import org.apache.lucene.queries.function.ValueSource;
 import org.apache.lucene.search.IndexSearcher;
@ -24,6 +25,11 @@ import org.apache.lucene.search.IndexSearcher;
 import java.io.IOException;
 import java.util.Map;

+/**
+ * Returns the value of {@link IndexReader#maxDoc()}
+ * for every document. This is the number of documents
+ * including deletions.
+ */
 public class MaxDocValueSource extends ValueSource {
  public String name() {
    return "maxdoc";
--- a/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/NormValueSource.java
+++ b/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/NormValueSource.java
@ -28,6 +28,13 @@ import org.apache.lucene.search.similarities.TFIDFSimilarity;
 import java.io.IOException;
 import java.util.Map;

+/** 
+ * Function that returns {@link TFIDFSimilarity#decodeNormValue(byte)}
+ * for every document.
+ * <p>
+ * Note that the configured Similarity for the field must be
+ * a subclass of {@link TFIDFSimilarity}
+ * @lucene.internal */
 public class NormValueSource extends ValueSource {
  protected final String field;
  public NormValueSource(String field) {
--- a/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/SumTotalTermFreqValueSource.java
+++ b/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/SumTotalTermFreqValueSource.java
@ -30,7 +30,10 @@ import java.io.IOException;
 import java.util.Map;

 /**
- * <code>TotalTermFreqValueSource</code> returns the total term freq (sum of term freqs across all docuyments).
+ * <code>SumTotalTermFreqValueSource</code> returns the number of tokens.
+ * (sum of term freqs across all documents, across all terms).
+ * Returns -1 if frequencies were omitted for the field, or if 
+ * the codec doesn't support this statistic.
 * @lucene.internal
 */
 public class SumTotalTermFreqValueSource extends ValueSource {
--- a/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/TFValueSource.java
+++ b/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/TFValueSource.java
@ -28,6 +28,13 @@ import org.apache.lucene.util.BytesRef;
 import java.io.IOException;
 import java.util.Map;

+/** 
+ * Function that returns {@link TFIDFSimilarity#tf(int)}
+ * for every document.
+ * <p>
+ * Note that the configured Similarity for the field must be
+ * a subclass of {@link TFIDFSimilarity}
+ * @lucene.internal */
 public class TFValueSource extends TermFreqValueSource {
  public TFValueSource(String field, String val, String indexedField, BytesRef indexedBytes) {
    super(field, val, indexedField, indexedBytes);
--- a/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/TermFreqValueSource.java
+++ b/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/TermFreqValueSource.java
@ -26,6 +26,13 @@ import org.apache.lucene.util.BytesRef;
 import java.io.IOException;
 import java.util.Map;

+/**
+ * Function that returns {@link DocsEnum#freq()} for the
+ * supplied term in every document.
+ * <p>
+ * If the term does not exist in the document, returns 0.
+ * If frequencies are omitted, returns 1.
+ */
 public class TermFreqValueSource extends DocFreqValueSource {
  public TermFreqValueSource(String field, String val, String indexedField, BytesRef indexedBytes) {
    super(field, val, indexedField, indexedBytes);
--- a/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/TotalTermFreqValueSource.java
+++ b/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/TotalTermFreqValueSource.java
@ -28,7 +28,10 @@ import java.io.IOException;
 import java.util.Map;

 /**
- * <code>TotalTermFreqValueSource</code> returns the total term freq (sum of term freqs across all docuyments).
+ * <code>TotalTermFreqValueSource</code> returns the total term freq 
+ * (sum of term freqs across all documents).
+ * Returns -1 if frequencies were omitted for the field, or if 
+ * the codec doesn't support this statistic.
 * @lucene.internal
 */
 public class TotalTermFreqValueSource extends ValueSource {
--- a/lucene/test-framework/src/java/org/apache/lucene/analysis/MockHoleInjectingTokenFilter.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/analysis/MockHoleInjectingTokenFilter.java
@ -29,7 +29,9 @@ import org.apache.lucene.util._TestUtil;
 // a MockRemovesTokensTF, ideally subclassing FilteringTF
 // (in modules/analysis)

-// Randomly injects holes:
+/** 
+ * Randomly injects holes (similar to what a stopfilter would do)
+ */
 public final class MockHoleInjectingTokenFilter extends TokenFilter {

  private final long randomSeed;
--- a/lucene/test-framework/src/java/org/apache/lucene/analysis/MockPayloadAnalyzer.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/analysis/MockPayloadAnalyzer.java
@ -27,8 +27,9 @@ import java.io.Reader;


 /**
- *
- *
+ * Wraps a whitespace tokenizer with a filter that sets
+ * the first token, and odd tokens to posinc=1, and all others
+ * to 0, encoding the position as pos: XXX in the payload.
 **/
 public final class MockPayloadAnalyzer extends Analyzer {

--- a/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene40ords/Lucene40WithOrds.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/codecs/lucene40ords/Lucene40WithOrds.java
@ -30,6 +30,7 @@ import org.apache.lucene.codecs.PostingsReaderBase;
 import org.apache.lucene.codecs.PostingsWriterBase;
 import org.apache.lucene.codecs.TermsIndexReaderBase;
 import org.apache.lucene.codecs.TermsIndexWriterBase;
+import org.apache.lucene.codecs.lucene40.Lucene40Codec; // javadocs
 import org.apache.lucene.codecs.lucene40.Lucene40PostingsReader;
 import org.apache.lucene.codecs.lucene40.Lucene40PostingsWriter;
 import org.apache.lucene.index.SegmentReadState;
@ -39,6 +40,10 @@ import org.apache.lucene.util.BytesRef;
 // TODO: we could make separate base class that can wrapp
 // any PostingsBaseFormat and make it ord-able...

+/**
+ * Customized version of {@link Lucene40Codec} that uses
+ * {@link FixedGapTermsIndexWriter}.
+ */
 public class Lucene40WithOrds extends PostingsFormat {
    
  public Lucene40WithOrds() {
--- a/lucene/test-framework/src/java/org/apache/lucene/codecs/mockintblock/MockFixedIntBlockPostingsFormat.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/codecs/mockintblock/MockFixedIntBlockPostingsFormat.java
@ -72,6 +72,9 @@ public class MockFixedIntBlockPostingsFormat extends PostingsFormat {
    return new MockIntFactory(blockSize);
  }

+  /**
+   * Encodes blocks as vInts of a fixed block size.
+   */
  public static class MockIntFactory extends IntStreamFactory {
    private final int blockSize;

--- a/lucene/test-framework/src/java/org/apache/lucene/codecs/mockintblock/MockVariableIntBlockPostingsFormat.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/codecs/mockintblock/MockVariableIntBlockPostingsFormat.java
@ -70,6 +70,10 @@ public class MockVariableIntBlockPostingsFormat extends PostingsFormat {
    return getName() + "(baseBlockSize="+ baseBlockSize + ")";
  }

+  /**
+   * If the first value is <= 3, writes baseBlockSize vInts at once,
+   * otherwise writes 2*baseBlockSize vInts.
+   */
  public static class MockIntFactory extends IntStreamFactory {

    private final int baseBlockSize;
--- a/lucene/test-framework/src/java/org/apache/lucene/codecs/mocksep/MockSingleIntFactory.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/codecs/mocksep/MockSingleIntFactory.java
@ -25,7 +25,10 @@ import org.apache.lucene.codecs.sep.IntStreamFactory;

 import java.io.IOException;

-/** @lucene.experimental */
+/** 
+ * Encodes ints directly as vInts with {@link MockSingleIntIndexOutput}
+ * @lucene.experimental 
+ */
 public class MockSingleIntFactory extends IntStreamFactory {
  @Override
  public IntIndexInput openInput(Directory dir, String fileName, IOContext context) throws IOException {
--- a/lucene/test-framework/src/java/org/apache/lucene/codecs/mocksep/MockSingleIntIndexInput.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/codecs/mocksep/MockSingleIntIndexInput.java
@ -28,7 +28,7 @@ import org.apache.lucene.store.IndexInput;

 /** Reads IndexInputs written with {@link
 *  MockSingleIntIndexOutput}.  NOTE: this class is just for
- *  demonstration puprposes (it is a very slow way to read a
+ *  demonstration purposes (it is a very slow way to read a
 *  block of ints).
 *
 * @lucene.experimental
@ -54,6 +54,9 @@ public class MockSingleIntIndexInput extends IntIndexInput {
    in.close();
  }

+  /**
+   * Just reads a vInt directly from the file.
+   */
  public static class Reader extends IntIndexInput.Reader {
    // clone:
    private final IndexInput in;
--- a/lucene/test-framework/src/java/org/apache/lucene/index/AlcoholicMergePolicy.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/index/AlcoholicMergePolicy.java
@ -68,7 +68,7 @@ public class AlcoholicMergePolicy extends LogMergePolicy {
    return info.sizeInBytes();
  }
  
-  public static enum Drink {
+  private static enum Drink {
    
    Beer(15), Wine(17), Champagne(21), WhiteRussian(22), SingleMalt(30);
    
@ -77,11 +77,6 @@ public class AlcoholicMergePolicy extends LogMergePolicy {
    Drink(long drunkFactor) {
      this.drunkFactor = drunkFactor;
    }
-    
-    public long drunk() {
-      return drunkFactor;
-    }
-    
  }
  
 }
--- a/lucene/test-framework/src/java/org/apache/lucene/index/FieldFilterAtomicReader.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/index/FieldFilterAtomicReader.java
@ -23,6 +23,10 @@ import java.util.Iterator;
 import java.util.NoSuchElementException;
 import java.util.Set;

+/**
+ * A {@link FilterAtomicReader} that exposes only a subset
+ * of fields from the underlying wrapped reader.
+ */
 public final class FieldFilterAtomicReader extends FilterAtomicReader {
  
  private final Set<String> fields;
--- a/lucene/test-framework/src/java/org/apache/lucene/search/ShardSearchingTestBase.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/search/ShardSearchingTestBase.java
@ -42,9 +42,15 @@ import org.apache.lucene.util._TestUtil;
 //   - doc blocks?  so we can test joins/grouping...
 //   - controlled consistency (NRTMgr)

+/**
+ * Base test class for simulating distributed search across multiple shards.
+ */
 public abstract class ShardSearchingTestBase extends LuceneTestCase {

  // TODO: maybe SLM should throw this instead of returning null...
+  /**
+   * Thrown when the lease for a searcher has expired.
+   */
  public static class SearcherExpiredException extends RuntimeException {
    public SearcherExpiredException(String message) {
      super(message);
@ -604,6 +610,9 @@ public abstract class ShardSearchingTestBase extends LuceneTestCase {
    }
  }

+  /**
+   * An IndexSearcher and associated version (lease)
+   */
  protected static class SearcherAndVersion {
    public final IndexSearcher searcher;
    public final long version;
--- a/lucene/test-framework/src/java/org/apache/lucene/store/MockDirectoryWrapper.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/store/MockDirectoryWrapper.java
@ -146,6 +146,12 @@ public class MockDirectoryWrapper extends BaseDirectoryWrapper {
    preventDoubleWrite = value;
  }
  
+  /**
+   * Enum for controlling hard disk throttling.
+   * Set via {@link MockDirectoryWrapper #setThrottling(Throttling)}
+   * <p>
+   * WARNING: can make tests very slow.
+   */
  public static enum Throttling {
    /** always emulate a slow hard disk. could be very slow! */
    ALWAYS,
--- a/lucene/test-framework/src/java/org/apache/lucene/util/CloseableFile.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/util/CloseableFile.java
@ -24,13 +24,17 @@ import java.io.*;
 */
 final class CloseableFile implements Closeable {
  private final File file;
+  private final TestRuleMarkFailure failureMarker;

-  public CloseableFile(File file) {
+  public CloseableFile(File file, TestRuleMarkFailure failureMarker) {
    this.file = file;
+    this.failureMarker = failureMarker;
  }

  @Override
  public void close() throws IOException {
+    // only if there were no other test failures.
+    if (failureMarker.wasSuccessful()) {
      if (file.exists()) {
        try {
          _TestUtil.rmDir(file);
@ -45,4 +49,5 @@ final class CloseableFile implements Closeable {
        }
    }
    }
+  }
 }
--- a/lucene/test-framework/src/java/org/apache/lucene/util/English.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/util/English.java
@ -18,6 +18,7 @@ package org.apache.lucene.util;
 */

 /**
+ * Converts numbers to english strings for testing.
 * @lucene.internal
 */ 
 public final class English {
--- a/lucene/test-framework/src/java/org/apache/lucene/util/RollingBuffer.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/util/RollingBuffer.java
@ -26,6 +26,9 @@ package org.apache.lucene.util;
 *  @lucene.internal */
 public abstract class RollingBuffer<T extends RollingBuffer.Resettable> {

+  /**
+   * Implement to reset an instance
+   */
  public static interface Resettable {
    public void reset();
  }
--- a/lucene/test-framework/src/java/org/apache/lucene/util/TestRuleFieldCacheSanity.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/util/TestRuleFieldCacheSanity.java
@ -1,10 +1,5 @@
 package org.apache.lucene.util;

-import org.apache.lucene.search.FieldCache;
-import org.junit.rules.TestRule;
-import org.junit.runner.Description;
-import org.junit.runners.model.Statement;
-
 /*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
@ -22,6 +17,30 @@ import org.junit.runners.model.Statement;
 * limitations under the License.
 */

+import org.apache.lucene.search.FieldCache;
+import org.apache.lucene.util.FieldCacheSanityChecker; // javadocs
+import org.junit.rules.TestRule;
+import org.junit.runner.Description;
+import org.junit.runners.model.Statement;
+
+/**
+ * This rule will fail the test if it has insane field caches.
+ * <p>
+ * calling assertSaneFieldCaches here isn't as useful as having test
+ * classes call it directly from the scope where the index readers
+ * are used, because they could be gc'ed just before this tearDown
+ * method is called.
+ * <p>
+ * But it's better then nothing.
+ * <p>
+ * If you are testing functionality that you know for a fact
+ * "violates" FieldCache sanity, then you should either explicitly
+ * call purgeFieldCache at the end of your test method, or refactor
+ * your Test class so that the inconsistent FieldCache usages are
+ * isolated in distinct test methods
+ * 
+ * @see FieldCacheSanityChecker
+ */
 public class TestRuleFieldCacheSanity implements TestRule {
  
  @Override
@ -33,18 +52,6 @@ public class TestRuleFieldCacheSanity implements TestRule {

        Throwable problem = null;
        try {
-          // calling assertSaneFieldCaches here isn't as useful as having test
-          // classes call it directly from the scope where the index readers
-          // are used, because they could be gc'ed just before this tearDown
-          // method is called.
-          //
-          // But it's better then nothing.
-          //
-          // If you are testing functionality that you know for a fact
-          // "violates" FieldCache sanity, then you should either explicitly
-          // call purgeFieldCache at the end of your test method, or refactor
-          // your Test class so that the inconsistent FieldCache usages are
-          // isolated in distinct test methods
          LuceneTestCase.assertSaneFieldCaches(d.getDisplayName());
        } catch (Throwable t) {
          problem = t;
--- a/lucene/test-framework/src/java/org/apache/lucene/util/TestRuleStoreClassName.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/util/TestRuleStoreClassName.java
@ -21,6 +21,10 @@ import org.junit.rules.TestRule;
 import org.junit.runner.Description;
 import org.junit.runners.model.Statement;

+/**
+ * Stores the suite name so you can retrieve it
+ * from {@link #getTestClass()}
+ */
 public class TestRuleStoreClassName implements TestRule {
  private volatile Description description;

--- a/lucene/test-framework/src/java/org/apache/lucene/util/_TestUtil.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/util/_TestUtil.java
@ -94,7 +94,7 @@ public class _TestUtil {
    try {
      File f = createTempFile(desc, "tmp", LuceneTestCase.TEMP_DIR);
      f.delete();
-      LuceneTestCase.closeAfterSuite(new CloseableFile(f));
+      LuceneTestCase.closeAfterSuite(new CloseableFile(f, LuceneTestCase.suiteFailureMarker));
      return f;
    } catch (IOException e) {
      throw new RuntimeException(e);
@ -136,7 +136,7 @@ public class _TestUtil {
    rmDir(destDir);

    destDir.mkdir();
-    LuceneTestCase.closeAfterSuite(new CloseableFile(destDir));
+    LuceneTestCase.closeAfterSuite(new CloseableFile(destDir, LuceneTestCase.suiteFailureMarker));

    while (entries.hasMoreElements()) {
      ZipEntry entry = entries.nextElement();
--- a/lucene/test-framework/src/java/org/apache/lucene/util/automaton/AutomatonTestUtil.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/util/automaton/AutomatonTestUtil.java
@ -127,6 +127,13 @@ public class AutomatonTestUtil {
    return code;
  }

+  /**
+   * Lets you retrieve random strings accepted
+   * by an Automaton.
+   * <p>
+   * Once created, call {@link #getRandomAcceptedString(Random)}
+   * to get a new string (in UTF-32 codepoints).
+   */
  public static class RandomAcceptedStrings {

    private final Map<Transition,Boolean> leadsToAccept;
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@ -26,6 +26,14 @@ $Id$

 ==================  4.0.0 ==================

+Versions of Major Components
+---------------------
+Apache Tika 1.2
+Carrot2 3.5.0
+Velocity 1.6.4 and Velocity Tools 2.0
+Apache UIMA 2.3.1
+Apache ZooKeeper 3.3.6
+
 Upgrading from Solr 4.0.0-BETA
 ----------------------

@ -63,6 +71,27 @@ Bug Fixes
 * SOLR-3649: Fixed bug in JavabinLoader that caused deleteById(List<String> ids)
  to not work in SolrJ (siren)

+* SOLR-3730: Rollback is not implemented quite right and can cause corner case fails in 
+  SolrCloud tests. (rmuir, Mark Miller)
+
+* SOLR-2981: Fixed StatsComponent to no longer return duplicated information 
+  when requesting multiple stats.facet fields.
+  (Roman Kliewer via hossman)
+
+Other Changes
+----------------------
+
+* SOLR-3690: Fixed binary release packages to include dependencie needed for 
+  the solr-test-framework (hossman)
+
+* SOLR-2857: The /update/json and /update/csv URLs were restored to aid
+  in the migration of existing clients.  (yonik)
+
+* SOLR-3691: SimplePostTool: Mode for crawling/posting web pages
+  See http://wiki.apache.org/solr/ExtractingRequestHandler for examples (janhoy)
+
+* SOLR-3707: Upgrade Solr to Tika 1.2 (janhoy)
+
 ==================  4.0.0-BETA ===================


@ -271,7 +300,6 @@ Other Changes
  Also, the configuration itself can be passed using the "dataConfig" parameter rather than
  using a file (this previously worked in debug mode only).  When configuration errors are 
  encountered, the error message is returned in XML format.  (James Dyer)
-  
 * SOLR-3439: Make SolrCell easier to use out of the box. Also improves "/browse" to display
  rich-text documents correctly, along with facets for author and content_type.  
  With the new "content" field, highlighting of body is supported. See also SOLR-3672 for
--- a/solr/NOTICE.txt
+++ b/solr/NOTICE.txt
@ -310,12 +310,11 @@ Copyright 2004 Sun Microsystems, Inc. (Rome JAR)

 Copyright 2002-2008 by John Cowan (TagSoup -- http://ccil.org/~cowan/XML/tagsoup/)

-Copyright (C) 1999-2007 Shigeru Chiba. All Rights Reserved.
-(Javassist, MPL licensed: http://www.csg.ci.i.u-tokyo.ac.jp/~chiba/javassist/)
-
 Copyright (C) 1994-2007 by the Xiph.org Foundation, http://www.xiph.org/ (OggVorbis)

-Scannotation (C) Bill Burke
+Copyright 2012 Kohei Taketa juniversalchardet (http://code.google.com/p/juniversalchardet/)
+
+Lasse Collin and others, XZ for Java (http://tukaani.org/xz/java.html)

 =========================================================================
 ==  Language Detection Notices                                         ==
--- a/solr/build.xml
+++ b/solr/build.xml
@ -386,8 +386,9 @@
      <tarfileset dir="."
                  prefix="${fullnamever}"
                  includes="LICENSE.txt NOTICE.txt CHANGES.txt README.txt example/**
-                             client/README.txt client/ruby/solr-ruby/** contrib/**/lib/**
-                             contrib/**/README.txt licenses/**"
+                            client/README.txt client/ruby/solr-ruby/** 
+                            contrib/**/lib/** contrib/**/README.txt 
+                            licenses/**"
                  excludes="lib/README.committers.txt **/data/ **/logs/* 
                            **/classes/ **/*.sh **/ivy.xml **/build.xml
                            **/bin/ **/*.iml **/*.ipr **/*.iws **/pom.xml 
@ -401,7 +402,9 @@
                  includes="example/**/*.sh example/**/bin/" />
      <tarfileset dir="."
                  prefix="${fullnamever}"
-                  includes="dist/*.jar dist/*.war dist/solrj-lib/*"
+                  includes="dist/*.jar dist/*.war 
+                            dist/solrj-lib/*
+                            dist/test-framework/**"
                  excludes="**/*.tgz **/*.zip **/*.md5 **/*src*.jar **/*docs*.jar **/*.sha1" />
      <tarfileset dir="${dest}/docs"
                  prefix="${fullnamever}/docs" />
--- a/solr/common-build.xml
+++ b/solr/common-build.xml
@ -193,7 +193,7 @@
  <property name="lucenedocs" location="${common.dir}/build/docs"/>

  <!-- dependency to ensure all lucene javadocs are present -->
-  <target name="lucene-javadocs" depends="javadocs-lucene-core,javadocs-analyzers-common,javadocs-analyzers-icu,javadocs-analyzers-kuromoji,javadocs-analyzers-phonetic,javadocs-analyzers-smartcn,javadocs-analyzers-morfologik,javadocs-analyzers-stempel,javadocs-analyzers-uima,javadocs-suggest,javadocs-grouping,javadocs-queries,javadocs-queryparser,javadocs-highlighter,javadocs-memory,javadocs-misc,javadocs-spatial"/>
+  <target name="lucene-javadocs" depends="javadocs-lucene-core,javadocs-analyzers-common,javadocs-analyzers-icu,javadocs-analyzers-kuromoji,javadocs-analyzers-phonetic,javadocs-analyzers-smartcn,javadocs-analyzers-morfologik,javadocs-analyzers-stempel,javadocs-analyzers-uima,javadocs-suggest,javadocs-grouping,javadocs-queries,javadocs-queryparser,javadocs-highlighter,javadocs-memory,javadocs-misc,javadocs-spatial,javadocs-test-framework"/>

  <!-- create javadocs for the current module -->
  <target name="javadocs" depends="compile-core,define-lucene-javadoc-url,lucene-javadocs">
--- a/solr/contrib/extraction/ivy.xml
+++ b/solr/contrib/extraction/ivy.xml
@ -20,36 +20,36 @@
    <info organisation="org.apache.solr" module="extraction"/>
    <dependencies>
      <!-- Tika JARs -->
-      <dependency org="org.apache.tika" name="tika-core" rev="1.1" transitive="false"/>
-      <dependency org="org.apache.tika" name="tika-parsers" rev="1.1" transitive="false"/>
-      <!-- Tika dependencies - see http://tika.apache.org/1.1/gettingstarted.html#Using_Tika_as_a_Maven_dependency -->
+      <dependency org="org.apache.tika" name="tika-core" rev="1.2" transitive="false"/>
+      <dependency org="org.apache.tika" name="tika-parsers" rev="1.2" transitive="false"/>
+      <!-- Tika dependencies - see http://tika.apache.org/1.2/gettingstarted.html#Using_Tika_as_a_Maven_dependency -->
      <!-- When upgrading Tika, upgrade dependencies versions and add any new ones
           (except slf4j-api, commons-codec, commons-logging, geronimo-stax-api_1.0_spec) -->
      <dependency org="org.gagravarr" name="vorbis-java-tika" rev="0.1" transitive="false"/>
      <dependency org="org.gagravarr" name="vorbis-java-core" rev="0.1" transitive="false"/>
      <dependency org="edu.ucar" name="netcdf" rev="4.2-min" transitive="false"/>
-      <dependency org="org.apache.james" name="apache-mime4j-core" rev="0.7" transitive="false"/>
-      <dependency org="org.apache.james" name="apache-mime4j-dom" rev="0.7" transitive="false"/>
-      <dependency org="org.apache.commons" name="commons-compress" rev="1.3" transitive="false"/>
-      <dependency org="org.apache.pdfbox" name="pdfbox" rev="1.6.0" transitive="false"/>
-      <dependency org="org.apache.pdfbox" name="fontbox" rev="1.6.0" transitive="false"/>
-      <dependency org="org.apache.pdfbox" name="jempbox" rev="1.6.0" transitive="false"/>
+      <dependency org="org.apache.james" name="apache-mime4j-core" rev="0.7.2" transitive="false"/>
+      <dependency org="org.apache.james" name="apache-mime4j-dom" rev="0.7.2" transitive="false"/>
+      <dependency org="org.apache.commons" name="commons-compress" rev="1.4.1" transitive="false"/>
+      <dependency org="org.apache.pdfbox" name="pdfbox" rev="1.7.0" transitive="false"/>
+      <dependency org="org.apache.pdfbox" name="fontbox" rev="1.7.0" transitive="false"/>
+      <dependency org="org.apache.pdfbox" name="jempbox" rev="1.7.0" transitive="false"/>
      <dependency org="org.bouncycastle" name="bcmail-jdk15" rev="1.45" transitive="false"/>
      <dependency org="org.bouncycastle" name="bcprov-jdk15" rev="1.45" transitive="false"/>
-      <dependency org="org.apache.poi" name="poi" rev="3.8-beta5" transitive="false"/>
-      <dependency org="org.apache.poi" name="poi-scratchpad" rev="3.8-beta5" transitive="false"/>
-      <dependency org="org.apache.poi" name="poi-ooxml" rev="3.8-beta5" transitive="false"/>
-      <dependency org="org.apache.poi" name="poi-ooxml-schemas" rev="3.8-beta5" transitive="false"/>
+      <dependency org="org.apache.poi" name="poi" rev="3.8" transitive="false"/>
+      <dependency org="org.apache.poi" name="poi-scratchpad" rev="3.8" transitive="false"/>
+      <dependency org="org.apache.poi" name="poi-ooxml" rev="3.8" transitive="false"/>
+      <dependency org="org.apache.poi" name="poi-ooxml-schemas" rev="3.8" transitive="false"/>
      <dependency org="org.apache.xmlbeans" name="xmlbeans" rev="2.3.0" transitive="false"/>
      <dependency org="dom4j" name="dom4j" rev="1.6.1" transitive="false"/>
      <dependency org="org.ccil.cowan.tagsoup" name="tagsoup" rev="1.2.1" transitive="false"/>
-      <dependency org="com.googlecode.mp4parser" name="isoparser" rev="1.0-beta-5" transitive="false"/>
-      <dependency org="net.sf.scannotation" name="scannotation" rev="1.0.2" transitive="false"/>
-      <dependency org="javassist" name="javassist" rev="3.6.0.GA" transitive="false"/>
+      <dependency org="com.googlecode.mp4parser" name="isoparser" rev="1.0-RC-1" transitive="false"/>
      <dependency org="com.drewnoakes" name="metadata-extractor" rev="2.4.0-beta-1" transitive="false"/>
      <dependency org="de.l3s.boilerpipe" name="boilerpipe" rev="1.1.0" transitive="false"/>
      <dependency org="rome" name="rome" rev="0.9" transitive="false"/>
      <dependency org="jdom" name="jdom" rev="1.0" transitive="false"/>
+      <dependency org="com.googlecode.juniversalchardet" name="juniversalchardet" rev="1.0.3" transitive="false"/>
+      <dependency org="org.tukaani" name="xz" rev="1.0" transitive="false"/>
      <!-- Other ExtracingRequestHandler dependencies -->
      <dependency org="com.ibm.icu" name="icu4j" rev="49.1" transitive="false"/>
      <dependency org="xerces" name="xercesImpl" rev="2.9.1" transitive="false"/>
--- a/solr/contrib/extraction/src/test/org/apache/solr/handler/extraction/ExtractingRequestHandlerTest.java
+++ b/solr/contrib/extraction/src/test/org/apache/solr/handler/extraction/ExtractingRequestHandlerTest.java
@ -64,8 +64,7 @@ public class ExtractingRequestHandlerTest extends SolrTestCaseJ4 {
            "fmap.producer", "extractedProducer",
            "fmap.creator", "extractedCreator", "fmap.Keywords", "extractedKeywords",
            "fmap.Creation-Date", "extractedDate",
-            "fmap.AAPL:Keywords", "ignored_a",
-            "fmap.xmpTPg:NPages", "ignored_a",
+            "uprefix", "ignored_",
            "fmap.Author", "extractedAuthor",
            "fmap.content", "extractedContent",
           "literal.id", "one",
@ -81,6 +80,7 @@ public class ExtractingRequestHandlerTest extends SolrTestCaseJ4 {
            "fmap.Author", "extractedAuthor",
            "fmap.language", "extractedLanguage",
            "literal.id", "two",
+            "uprefix", "ignored_",
            "fmap.content", "extractedContent",
            "fmap.Last-Modified", "extractedDate"
    );
@ -136,6 +136,7 @@ public class ExtractingRequestHandlerTest extends SolrTestCaseJ4 {
            "fmap.creator", "extractedCreator", "fmap.Keywords", "extractedKeywords",
            "fmap.Author", "extractedAuthor",
            "literal.id", "three",
+            "uprefix", "ignored_",
            "fmap.content", "extractedContent",
            "fmap.language", "extractedLanguage",
            "fmap.Last-Modified", "extractedDate"
@ -206,6 +207,7 @@ public class ExtractingRequestHandlerTest extends SolrTestCaseJ4 {
            "fmap.Author", "extractedAuthor",
            "fmap.content", "extractedContent",
            "literal.id", "one",
+            "uprefix", "ignored_",
            "fmap.language", "extractedLanguage",
            "literal.extractionLiteralMV", "one",
            "literal.extractionLiteralMV", "two",
@ -374,9 +376,8 @@ public class ExtractingRequestHandlerTest extends SolrTestCaseJ4 {
    loadLocal("extraction/arabic.pdf", "fmap.created", "extractedDate", "fmap.producer", "extractedProducer",
        "fmap.creator", "extractedCreator", "fmap.Keywords", "extractedKeywords",
        "fmap.Creation-Date", "extractedDate",
-        "fmap.AAPL:Keywords", "ignored_a",
-        "fmap.xmpTPg:NPages", "ignored_a",
        "fmap.Author", "extractedAuthor",
+        "uprefix", "ignored_",
        "fmap.content", "wdf_nocase",
       "literal.id", "one",
        "fmap.Last-Modified", "extractedDate");
@ -404,8 +405,7 @@ public class ExtractingRequestHandlerTest extends SolrTestCaseJ4 {
      loadLocal("extraction/password-is-solrcell.docx", "fmap.created", "extractedDate", "fmap.producer", "extractedProducer",
          "fmap.creator", "extractedCreator", "fmap.Keywords", "extractedKeywords",
          "fmap.Creation-Date", "extractedDate",
-          "fmap.AAPL:Keywords", "ignored_a",
-          "fmap.xmpTPg:NPages", "ignored_a",
+          "uprefix", "ignored_",
          "fmap.Author", "extractedAuthor",
          "fmap.content", "wdf_nocase",
          "literal.id", "one",
@ -462,8 +462,7 @@ public class ExtractingRequestHandlerTest extends SolrTestCaseJ4 {
            "fmap.content", "extractedContent",
            "fmap.language", "extractedLanguage",
            "fmap.Creation-Date", "extractedDate",
-            "fmap.AAPL:Keywords", "ignored_a",
-            "fmap.xmpTPg:NPages", "ignored_a",
+            "uprefix", "ignored_",
            "fmap.Last-Modified", "extractedDate");

    // Here the literal value should override the Tika-parsed title:
@ -478,8 +477,7 @@ public class ExtractingRequestHandlerTest extends SolrTestCaseJ4 {
            "fmap.content", "extractedContent",
            "fmap.language", "extractedLanguage",
            "fmap.Creation-Date", "extractedDate",
-            "fmap.AAPL:Keywords", "ignored_a",
-            "fmap.xmpTPg:NPages", "ignored_a",
+            "uprefix", "ignored_",
            "fmap.Last-Modified", "extractedDate");

    // Here we mimic the old behaviour where literals are added, not overridden
@ -498,8 +496,7 @@ public class ExtractingRequestHandlerTest extends SolrTestCaseJ4 {
            "fmap.content", "extractedContent",
            "fmap.language", "extractedLanguage",
            "fmap.Creation-Date", "extractedDate",
-            "fmap.AAPL:Keywords", "ignored_a",
-            "fmap.xmpTPg:NPages", "ignored_a",
+            "uprefix", "ignored_",
            "fmap.Last-Modified", "extractedDate");

    assertU(commit());
--- a/solr/core/src/java/org/apache/solr/core/CachingDirectoryFactory.java
+++ b/solr/core/src/java/org/apache/solr/core/CachingDirectoryFactory.java
@ -125,6 +125,7 @@ public abstract class CachingDirectoryFactory extends DirectoryFactory {
      }
      cacheValue.refCnt--;
      if (cacheValue.refCnt == 0 && cacheValue.doneWithDir) {
+        log.info("Closing directory:" + cacheValue.path);
        directory.close();
        byDirectoryCache.remove(directory);
        byPathCache.remove(cacheValue.path);
@ -194,6 +195,7 @@ public abstract class CachingDirectoryFactory extends DirectoryFactory {
        
        byDirectoryCache.put(directory, newCacheValue);
        byPathCache.put(fullPath, newCacheValue);
+        log.info("return new directory for " + fullPath + " forceNew:" + forceNew);
      } else {
        cacheValue.refCnt++;
      }
--- a/solr/core/src/java/org/apache/solr/core/SolrCore.java
+++ b/solr/core/src/java/org/apache/solr/core/SolrCore.java
@ -1554,7 +1554,7 @@ public final class SolrCore implements SolrInfoMBean {
        } catch (Throwable e) {
          // do not allow decref() operations to fail since they are typically called in finally blocks
          // and throwing another exception would be very unexpected.
-          SolrException.log(log, "Error closing searcher:", e);
+          SolrException.log(log, "Error closing searcher:" + this, e);
        }
      }
    };
--- a/solr/core/src/java/org/apache/solr/handler/CSVRequestHandler.java
+++ b/solr/core/src/java/org/apache/solr/handler/CSVRequestHandler.java
@ -29,7 +29,7 @@ public class CSVRequestHandler extends UpdateRequestHandler {
  public void init(NamedList args) {
    super.init(args);
    setAssumeContentType("application/csv");
-    log.warn("Using deprecated class: "+this.getClass().getSimpleName()+" -- replace with UpdateRequestHandler");
+    // log.warn("Using deprecated class: "+this.getClass().getSimpleName()+" -- replace with UpdateRequestHandler");
  }

  //////////////////////// SolrInfoMBeans methods //////////////////////
--- a/solr/core/src/java/org/apache/solr/handler/JsonUpdateRequestHandler.java
+++ b/solr/core/src/java/org/apache/solr/handler/JsonUpdateRequestHandler.java
@ -29,7 +29,7 @@ public class JsonUpdateRequestHandler extends UpdateRequestHandler {
  public void init(NamedList args) {
    super.init(args);
    setAssumeContentType("application/json");
-    log.warn("Using deprecated class: "+this.getClass().getSimpleName()+" -- replace with UpdateRequestHandler");
+    // log.warn("Using deprecated class: "+this.getClass().getSimpleName()+" -- replace with UpdateRequestHandler");
  }

  //////////////////////// SolrInfoMBeans methods //////////////////////
--- a/solr/core/src/java/org/apache/solr/handler/SnapPuller.java
+++ b/solr/core/src/java/org/apache/solr/handler/SnapPuller.java
@ -384,7 +384,7 @@ public class SnapPuller {
            // may be closed
            core.getDirectoryFactory().doneWithDirectory(oldDirectory);
          }
-          doCommit();
+          doCommit(isFullCopyNeeded);
        }
        
        replicationStartTime = 0;
@ -533,11 +533,11 @@ public class SnapPuller {
    return sb;
  }

-  private void doCommit() throws IOException {
+  private void doCommit(boolean isFullCopyNeeded) throws IOException {
    SolrQueryRequest req = new LocalSolrQueryRequest(solrCore,
        new ModifiableSolrParams());
    // reboot the writer on the new index and get a new searcher
-    solrCore.getUpdateHandler().newIndexWriter(true);
+    solrCore.getUpdateHandler().newIndexWriter(isFullCopyNeeded);
    
    try {
      // first try to open an NRT searcher so that the new 
--- a/solr/core/src/java/org/apache/solr/handler/component/StatsValuesFactory.java
+++ b/solr/core/src/java/org/apache/solr/handler/component/StatsValuesFactory.java
@ -182,8 +182,8 @@ abstract class AbstractStatsValues<T> implements StatsValues {
      for (Map.Entry<String, StatsValues> e2 : entry.getValue().entrySet()) {
        nl2.add(e2.getKey(), e2.getValue().getStatsValues());
      }
-      res.add(FACETS, nl);
    }
+    res.add(FACETS, nl);
    return res;
  }

--- a/solr/core/src/java/org/apache/solr/update/DefaultSolrCoreState.java
+++ b/solr/core/src/java/org/apache/solr/update/DefaultSolrCoreState.java
@ -74,8 +74,7 @@ public final class DefaultSolrCoreState extends SolrCoreState {
      }
      
      if (indexWriter == null) {
-        indexWriter = createMainIndexWriter(core, "DirectUpdateHandler2",
-            false, false);
+        indexWriter = createMainIndexWriter(core, "DirectUpdateHandler2", false);
      }
      if (refCntWriter == null) {
        refCntWriter = new RefCounted<IndexWriter>(indexWriter) {
@ -113,15 +112,25 @@ public final class DefaultSolrCoreState extends SolrCoreState {

      try {
        if (indexWriter != null) {
+          if (!rollback) {
            try {
              log.info("Closing old IndexWriter... core=" + coreName);
              indexWriter.close();
            } catch (Throwable t) {
-            SolrException.log(log, "Error closing old IndexWriter. core=" + coreName, t);
+              SolrException.log(log, "Error closing old IndexWriter. core="
+                  + coreName, t);
+            }
+          } else {
+            try {
+              log.info("Rollback old IndexWriter... core=" + coreName);
+              indexWriter.rollback();
+            } catch (Throwable t) {
+              SolrException.log(log, "Error rolling back old IndexWriter. core="
+                  + coreName, t);
            }
          }
-        indexWriter = createMainIndexWriter(core, "DirectUpdateHandler2",
-            false, true);
+        }
+        indexWriter = createMainIndexWriter(core, "DirectUpdateHandler2", true);
        log.info("New IndexWriter is ready to be used.");
        // we need to null this so it picks up the new writer next get call
        refCntWriter = null;
@ -174,14 +183,12 @@ public final class DefaultSolrCoreState extends SolrCoreState {

  @Override
  public synchronized void rollbackIndexWriter(SolrCore core) throws IOException {
-    indexWriter.rollback();
    newIndexWriter(core, true);
  }
  
-  protected SolrIndexWriter createMainIndexWriter(SolrCore core, String name,
-      boolean removeAllExisting, boolean forceNewDirectory) throws IOException {
+  protected SolrIndexWriter createMainIndexWriter(SolrCore core, String name, boolean forceNewDirectory) throws IOException {
    return new SolrIndexWriter(name, core.getNewIndexDir(),
-        core.getDirectoryFactory(), removeAllExisting, core.getSchema(),
+        core.getDirectoryFactory(), false, core.getSchema(),
        core.getSolrConfig().indexConfig, core.getDeletionPolicy(), core.getCodec(), forceNewDirectory);
  }

--- a/solr/core/src/java/org/apache/solr/update/MemOutputStream.java
+++ b/solr/core/src/java/org/apache/solr/update/MemOutputStream.java
@ -0,0 +1,53 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.solr.update;
+
+import org.apache.solr.common.util.FastOutputStream;
+
+import java.io.IOException;
+import java.util.LinkedList;
+import java.util.List;
+
+/** @lucene.internal */
+public class MemOutputStream extends FastOutputStream {
+  public List<byte[]> buffers = new LinkedList<byte[]>();
+  public MemOutputStream(byte[] tempBuffer) {
+    super(null, tempBuffer, 0);
+  }
+
+  @Override
+  public void flush(byte[] arr, int offset, int len) throws IOException {
+    if (arr == buf && offset==0 && len==buf.length) {
+      buffers.add(buf);  // steal the buffer
+      buf = new byte[8192];
+    } else if (len > 0) {
+      byte[] newBuf = new byte[len];
+      System.arraycopy(arr, offset, newBuf, 0, len);
+      buffers.add(newBuf);
+    }
+  }
+
+  public void writeAll(FastOutputStream fos) throws IOException {
+    for (byte[] buffer : buffers) {
+      fos.write(buffer);
+    }
+    if (pos > 0) {
+      fos.write(buf, 0, pos);
+    }
+  }
+}
--- a/solr/core/src/java/org/apache/solr/update/SolrIndexWriter.java
+++ b/solr/core/src/java/org/apache/solr/update/SolrIndexWriter.java
@ -141,6 +141,8 @@ public class SolrIndexWriter extends IndexWriter {
      super.rollback();
    } finally {
      isClosed = true;
+      directoryFactory.release(getDirectory());
+      numCloses.incrementAndGet();
    }
  }

--- a/solr/core/src/java/org/apache/solr/update/TransactionLog.java
+++ b/solr/core/src/java/org/apache/solr/update/TransactionLog.java
@ -775,31 +775,3 @@ class ChannelFastInputStream extends FastInputStream {
 }


-class MemOutputStream extends FastOutputStream {
-  public List<byte[]> buffers = new LinkedList<byte[]>();
-  public MemOutputStream(byte[] tempBuffer) {
-    super(null, tempBuffer, 0);
-  }
-
-  @Override
-  public void flush(byte[] arr, int offset, int len) throws IOException {
-    if (arr == buf && offset==0 && len==buf.length) {
-      buffers.add(buf);  // steal the buffer
-      buf = new byte[8192];
-    } else if (len > 0) {
-      byte[] newBuf = new byte[len];
-      System.arraycopy(arr, offset, newBuf, 0, len);
-      buffers.add(newBuf);
-    }
-  }
-
-  public void writeAll(FastOutputStream fos) throws IOException {
-    for (byte[] buffer : buffers) {
-      fos.write(buffer);
-    }
-    if (pos > 0) {
-      fos.write(buf, 0, pos);
-    }
-  }
-}
-
--- a/solr/core/src/java/org/apache/solr/util/FastWriter.java
+++ b/solr/core/src/java/org/apache/solr/util/FastWriter.java
@ -28,7 +28,7 @@ public class FastWriter extends Writer {
  // it won't cause double buffering.
  private static final int BUFSIZE = 8192;
  protected final Writer sink;
-  protected final char[] buf;
+  protected char[] buf;
  protected int pos;

  public FastWriter(Writer w) {
@ -69,42 +69,64 @@ public class FastWriter extends Writer {
  }

  @Override
-  public void write(char cbuf[], int off, int len) throws IOException {
+  public void write(char arr[], int off, int len) throws IOException {
+    for(;;) {
      int space = buf.length - pos;
-    if (len < space) {
-      System.arraycopy(cbuf, off, buf, pos, len);
+
+      if (len <= space) {
+        System.arraycopy(arr, off, buf, pos, len);
        pos += len;
-    } else if (len<BUFSIZE) {
-      // if the data to write is small enough, buffer it.
-      System.arraycopy(cbuf, off, buf, pos, space);
-      flush(buf, 0, buf.length);
-      pos = len-space;
-      System.arraycopy(cbuf, off+space, buf, 0, pos);
-    } else {
+        return;
+      } else if (len > buf.length) {
+        if (pos>0) {
          flush(buf,0,pos);  // flush
          pos=0;
+        }
        // don't buffer, just write to sink
-      flush(cbuf, off, len);
+        flush(arr, off, len);
+        return;
+      }
+
+      // buffer is too big to fit in the free space, but
+      // not big enough to warrant writing on its own.
+      // write whatever we can fit, then flush and iterate.
+
+      System.arraycopy(arr, off, buf, pos, space);
+      flush(buf, 0, buf.length);
+      pos = 0;
+      off += space;
+      len -= space;
    }
  }

  @Override
  public void write(String str, int off, int len) throws IOException {
+    for(;;) {
      int space = buf.length - pos;
-    if (len < space) {
+
+      if (len <= space) {
        str.getChars(off, off+len, buf, pos);
        pos += len;
-    } else if (len<BUFSIZE) {
-      // if the data to write is small enough, buffer it.
-      str.getChars(off, off+space, buf, pos);
-      flush(buf, 0, buf.length);
-      str.getChars(off+space, off+len, buf, 0);
-      pos = len-space;
-    } else {
+        return;
+      } else if (len > buf.length) {
+        if (pos>0) {
          flush(buf,0,pos);  // flush
          pos=0;
+        }
        // don't buffer, just write to sink
        flush(str, off, len);
+        return;
+      }
+
+      // buffer is too big to fit in the free space, but
+      // not big enough to warrant writing on its own.
+      // write whatever we can fit, then flush and iterate.
+
+      str.getChars(off, off+space, buf, pos);
+      flush(buf, 0, buf.length);
+      pos = 0;
+      off += space;
+      len -= space;
    }
  }

--- a/solr/core/src/java/org/apache/solr/util/SimplePostTool.java
+++ b/solr/core/src/java/org/apache/solr/util/SimplePostTool.java
--- a/solr/core/src/test-files/exampledocs/example.html
+++ b/solr/core/src/test-files/exampledocs/example.html
@ -0,0 +1,49 @@
+<html>
+<head>
+  <title>Welcome to Solr</title>
+</head>
+<body>
+<p>
+  Here is some text
+</p>
+<div>Here is some text in a div</div>
+<div>This has a <a href="http://www.apache.org">link</a>.</div>
+<a href="#news">News</a>
+<ul class="minitoc">
+<li>
+<a href="#03+October+2008+-+Solr+Logo+Contest">03 October 2008 - Solr Logo Contest</a>
+</li>
+<li>
+<a href="#15+September+2008+-+Solr+1.3.0+Available">15 September 2008 - Solr 1.3.0 Available</a>
+</li>
+<li>
+<a href="#28+August+2008+-+Lucene%2FSolr+at+ApacheCon+New+Orleans">28 August 2008 - Lucene/Solr at ApacheCon New Orleans</a>
+</li>
+<li>
+<a href="#03+September+2007+-+Lucene+at+ApacheCon+Atlanta">03 September 2007 - Lucene at ApacheCon Atlanta</a>
+</li>
+<li>
+<a href="#06+June+2007%3A+Release+1.2+available">06 June 2007: Release 1.2 available</a>
+</li>
+<li>
+<a href="#17+January+2007%3A+Solr+graduates+from+Incubator">17 January 2007: Solr graduates from Incubator</a>
+</li>
+<li>
+<a href="#22+December+2006%3A+Release+1.1.0+available">22 December 2006: Release 1.1.0 available</a>
+</li>
+<li>
+<a href="#15+August+2006%3A+Solr+at+ApacheCon+US">15 August 2006: Solr at ApacheCon US</a>
+</li>
+<li>
+<a href="#21+April+2006%3A+Solr+at+ApacheCon">21 April 2006: Solr at ApacheCon</a>
+</li>
+<li>
+<a href="#21+February+2006%3A+nightly+builds">21 February 2006: nightly builds</a>
+</li>
+<li>
+<a href="#17+January+2006%3A+Solr+Joins+Apache+Incubator">17 January 2006: Solr Joins Apache Incubator</a>
+</li>
+</ul>
+
+</body>
+</html>
--- a/solr/core/src/test-files/exampledocs/example.txt
+++ b/solr/core/src/test-files/exampledocs/example.txt
@ -0,0 +1,3 @@
+Example text document
+
+This is a simple example for a plain text document, indexed to Solr
--- a/solr/core/src/test-files/solr/collection1/conf/solrconfig.xml
+++ b/solr/core/src/test-files/solr/collection1/conf/solrconfig.xml
@ -54,7 +54,7 @@
    -->
    <maxBufferedDocs>10</maxBufferedDocs>
    <mergePolicy class="org.apache.lucene.index.LogDocMergePolicy"/>
-    <lockType>single</lockType>
+    <lockType>native</lockType>
    <unlockOnStartup>true</unlockOnStartup>
  </indexConfig>
  
--- a/solr/core/src/test/org/apache/solr/handler/component/StatsComponentTest.java
+++ b/solr/core/src/test/org/apache/solr/handler/component/StatsComponentTest.java
@ -228,32 +228,35 @@ public class StatsComponentTest extends AbstractSolrTestCase {
  }

  public void doTestFacetStatisticsResult(String f) throws Exception {
-    assertU(adoc("id", "1", f, "10", "active_s", "true"));
-    assertU(adoc("id", "2", f, "20", "active_s", "true"));
-    assertU(adoc("id", "3", f, "30", "active_s", "false"));
-    assertU(adoc("id", "4", f, "40", "active_s", "false"));
+    assertU(adoc("id", "1", f, "10", "active_s", "true",  "other_s", "foo"));
+    assertU(adoc("id", "2", f, "20", "active_s", "true",  "other_s", "bar"));
+    assertU(adoc("id", "3", f, "30", "active_s", "false", "other_s", "foo"));
+    assertU(adoc("id", "4", f, "40", "active_s", "false", "other_s", "foo"));
    assertU(commit());
    
-    assertQ("test value for active_s=true", req("q","*:*", "stats","true", "stats.field",f, "stats.facet","active_s","indent","true")
-            , "//lst[@name='true']/double[@name='min'][.='10.0']"
-            , "//lst[@name='true']/double[@name='max'][.='20.0']"
-            , "//lst[@name='true']/double[@name='sum'][.='30.0']"
-            , "//lst[@name='true']/long[@name='count'][.='2']"
-            , "//lst[@name='true']/long[@name='missing'][.='0']"
-            , "//lst[@name='true']/double[@name='sumOfSquares'][.='500.0']"
-            , "//lst[@name='true']/double[@name='mean'][.='15.0']"
-            , "//lst[@name='true']/double[@name='stddev'][.='7.0710678118654755']"
+    final String pre = "//lst[@name='stats_fields']/lst[@name='"+f+"']/lst[@name='facets']/lst[@name='active_s']";
+
+    assertQ("test value for active_s=true", req("q","*:*", "stats","true", "stats.field",f, "stats.facet","active_s","stats.facet","other_s","indent","true")
+            , "*[count("+pre+")=1]"
+            , pre+"/lst[@name='true']/double[@name='min'][.='10.0']"
+            , pre+"/lst[@name='true']/double[@name='max'][.='20.0']"
+            , pre+"/lst[@name='true']/double[@name='sum'][.='30.0']"
+            , pre+"/lst[@name='true']/long[@name='count'][.='2']"
+            , pre+"/lst[@name='true']/long[@name='missing'][.='0']"
+            , pre+"/lst[@name='true']/double[@name='sumOfSquares'][.='500.0']"
+            , pre+"/lst[@name='true']/double[@name='mean'][.='15.0']"
+            , pre+"/lst[@name='true']/double[@name='stddev'][.='7.0710678118654755']"
    );

    assertQ("test value for active_s=false", req("q","*:*", "stats","true", "stats.field",f, "stats.facet","active_s")
-            , "//lst[@name='false']/double[@name='min'][.='30.0']"
-            , "//lst[@name='false']/double[@name='max'][.='40.0']"
-            , "//lst[@name='false']/double[@name='sum'][.='70.0']"
-            , "//lst[@name='false']/long[@name='count'][.='2']"
-            , "//lst[@name='false']/long[@name='missing'][.='0']"
-            , "//lst[@name='false']/double[@name='sumOfSquares'][.='2500.0']"
-            , "//lst[@name='false']/double[@name='mean'][.='35.0']"
-            , "//lst[@name='false']/double[@name='stddev'][.='7.0710678118654755']"
+            , pre+"/lst[@name='false']/double[@name='min'][.='30.0']"
+            , pre+"/lst[@name='false']/double[@name='max'][.='40.0']"
+            , pre+"/lst[@name='false']/double[@name='sum'][.='70.0']"
+            , pre+"/lst[@name='false']/long[@name='count'][.='2']"
+            , pre+"/lst[@name='false']/long[@name='missing'][.='0']"
+            , pre+"/lst[@name='false']/double[@name='sumOfSquares'][.='2500.0']"
+            , pre+"/lst[@name='false']/double[@name='mean'][.='35.0']"
+            , pre+"/lst[@name='false']/double[@name='stddev'][.='7.0710678118654755']"
    );
  }
  
--- a/solr/core/src/test/org/apache/solr/util/SimplePostToolTest.java
+++ b/solr/core/src/test/org/apache/solr/util/SimplePostToolTest.java
@ -0,0 +1,237 @@
+package org.apache.solr.util;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.ByteArrayInputStream;
+import java.io.File;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.UnsupportedEncodingException;
+import java.net.MalformedURLException;
+import java.net.URL;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Set;
+
+import org.apache.solr.SolrTestCaseJ4;
+import org.apache.solr.util.SimplePostTool.PageFetcher;
+import org.apache.solr.util.SimplePostTool.PageFetcherResult;
+import org.junit.Before;
+import org.junit.Test;
+
+public class SimplePostToolTest extends SolrTestCaseJ4 {
+  SimplePostTool t_file, t_file_auto, t_file_rec, t_web, t_test;
+  PageFetcher pf;
+  
+  @Before
+  public void setUp() throws Exception {
+    super.setUp();
+    String[] args = {"-"};
+    System.setProperty("data", "files");
+    t_file = SimplePostTool.parseArgsAndInit(args);
+
+    System.setProperty("auto", "yes");
+    t_file_auto = SimplePostTool.parseArgsAndInit(args);
+
+    System.setProperty("recursive", "yes");
+    t_file_rec = SimplePostTool.parseArgsAndInit(args);
+    
+    System.setProperty("data", "web");
+    t_web = SimplePostTool.parseArgsAndInit(args);
+
+    System.setProperty("params", "param1=foo&param2=bar");
+    t_test = SimplePostTool.parseArgsAndInit(args);
+
+    pf = new MockPageFetcher();
+    SimplePostTool.pageFetcher = pf;
+    SimplePostTool.mockMode = true;
+  }
+  
+  @Test
+  public void testParseArgsAndInit() {
+    assertEquals(false, t_file.auto);
+    assertEquals(true, t_file_auto.auto);
+    assertEquals(0, t_file_auto.recursive);
+    assertEquals(999, t_file_rec.recursive);
+    assertEquals(true, t_file.commit);
+    assertEquals(false, t_file.optimize);
+    assertEquals(null, t_file.out);
+
+    assertEquals(1, t_web.recursive);
+    assertEquals(10, t_web.delay);
+    
+    assertNotNull(t_test.solrUrl);
+  }
+  
+  @Test
+  public void testNormalizeUrlEnding() {
+    assertEquals("http://example.com", SimplePostTool.normalizeUrlEnding("http://example.com/"));
+    assertEquals("http://example.com", SimplePostTool.normalizeUrlEnding("http://example.com/#foo?bar=baz"));
+    assertEquals("http://example.com/index.html", SimplePostTool.normalizeUrlEnding("http://example.com/index.html#hello"));
+  }
+  
+  @Test
+  public void testComputeFullUrl() throws MalformedURLException {
+    assertEquals("http://example.com/index.html", t_web.computeFullUrl(new URL("http://example.com/"), "/index.html"));
+    assertEquals("http://example.com/index.html", t_web.computeFullUrl(new URL("http://example.com/foo/bar/"), "/index.html"));
+    assertEquals("http://example.com/fil.html", t_web.computeFullUrl(new URL("http://example.com/foo.htm?baz#hello"), "fil.html"));
+//    TODO: How to know what is the base if URL path ends with "foo"?? 
+//    assertEquals("http://example.com/fil.html", t_web.computeFullUrl(new URL("http://example.com/foo?baz#hello"), "fil.html"));
+    assertEquals(null, t_web.computeFullUrl(new URL("http://example.com/"), "fil.jpg"));
+    assertEquals(null, t_web.computeFullUrl(new URL("http://example.com/"), "mailto:hello@foo.bar"));
+    assertEquals(null, t_web.computeFullUrl(new URL("http://example.com/"), "ftp://server/file"));
+  }
+  
+  @Test
+  public void testTypeSupported() {
+    assertTrue(t_web.typeSupported("application/pdf"));
+    assertTrue(t_web.typeSupported("text/xml"));
+    assertFalse(t_web.typeSupported("text/foo"));
+
+    t_web.fileTypes = "doc,xls,ppt";
+    t_web.globFileFilter = t_web.getFileFilterFromFileTypes(t_web.fileTypes);
+    assertFalse(t_web.typeSupported("application/pdf"));
+    assertTrue(t_web.typeSupported("application/msword"));
+  }
+  
+  @Test
+  public void testIsOn() {
+    assertTrue(SimplePostTool.isOn("true"));
+    assertTrue(SimplePostTool.isOn("1"));
+    assertFalse(SimplePostTool.isOn("off"));
+  }
+  
+  @Test
+  public void testAppendParam() {
+    assertEquals("http://example.com?foo=bar", SimplePostTool.appendParam("http://example.com", "foo=bar"));
+    assertEquals("http://example.com/?a=b&foo=bar", SimplePostTool.appendParam("http://example.com/?a=b", "foo=bar"));
+  }
+  
+  @Test
+  public void testAppendUrlPath() throws MalformedURLException {
+    assertEquals(new URL("http://example.com/a?foo=bar"), SimplePostTool.appendUrlPath(new URL("http://example.com?foo=bar"), "/a"));
+  }
+  
+  @Test
+  public void testGuessType() {
+    File f = new File("foo.doc");
+    assertEquals("application/msword", SimplePostTool.guessType(f));
+    f = new File("foobar");
+    assertEquals(null, SimplePostTool.guessType(f));
+  }
+
+  @Test
+  public void testDoFilesMode() {
+    t_file_auto.recursive = 0;
+    File dir = getFile("exampledocs");
+    int num = t_file_auto.postFiles(new File[] {dir}, 0, null, null);
+    assertEquals(2, num);
+  }
+
+  @Test
+  public void testDoWebMode() {
+    // Uses mock pageFetcher
+    t_web.delay = 0;
+    t_web.recursive = 5;
+    int num = t_web.postWebPages(new String[] {"http://example.com/#removeme"}, 0, null);
+    assertEquals(5, num);
+    
+    t_web.recursive = 1;
+    num = t_web.postWebPages(new String[] {"http://example.com/"}, 0, null);
+    assertEquals(3, num);
+    
+    // Without respecting robots.txt
+    SimplePostTool.pageFetcher.robotsCache.clear();
+    t_web.recursive = 5;
+    num = t_web.postWebPages(new String[] {"http://example.com/#removeme"}, 0, null);
+    assertEquals(6, num);
+}
+  
+  @Test
+  public void testRobotsExclusion() throws MalformedURLException {
+    assertFalse(SimplePostTool.pageFetcher.isDisallowedByRobots(new URL("http://example.com/")));
+    assertTrue(SimplePostTool.pageFetcher.isDisallowedByRobots(new URL("http://example.com/disallowed")));
+    assertTrue("There should be two entries parsed from robots.txt", SimplePostTool.pageFetcher.robotsCache.get("example.com").size() == 2);
+  }
+
+  class MockPageFetcher extends PageFetcher {
+    HashMap<String,String> htmlMap = new HashMap<String,String>();
+    HashMap<String,Set<URL>> linkMap = new HashMap<String,Set<URL>>();
+    
+    public MockPageFetcher() throws IOException {
+      (new SimplePostTool()).super();
+      htmlMap.put("http://example.com", "<html><body><a href=\"http://example.com/page1\">page1</a><a href=\"http://example.com/page2\">page2</a></body></html>");
+      htmlMap.put("http://example.com/index.html", "<html><body><a href=\"http://example.com/page1\">page1</a><a href=\"http://example.com/page2\">page2</a></body></html>");
+      htmlMap.put("http://example.com/page1", "<html><body><a href=\"http://example.com/page1/foo\"></body></html>");
+      htmlMap.put("http://example.com/page1/foo", "<html><body><a href=\"http://example.com/page1/foo/bar\"></body></html>");
+      htmlMap.put("http://example.com/page1/foo/bar", "<html><body><a href=\"http://example.com/page1\"></body></html>");
+      htmlMap.put("http://example.com/page2", "<html><body><a href=\"http://example.com/\"><a href=\"http://example.com/disallowed\"/></body></html>");
+      htmlMap.put("http://example.com/disallowed", "<html><body><a href=\"http://example.com/\"></body></html>");
+
+      Set<URL> s = new HashSet<URL>();
+      s.add(new URL("http://example.com/page1"));
+      s.add(new URL("http://example.com/page2"));
+      linkMap.put("http://example.com", s);
+      linkMap.put("http://example.com/index.html", s);
+      s = new HashSet<URL>();
+      s.add(new URL("http://example.com/page1/foo"));
+      linkMap.put("http://example.com/page1", s);
+      s = new HashSet<URL>();
+      s.add(new URL("http://example.com/page1/foo/bar"));
+      linkMap.put("http://example.com/page1/foo", s);
+      s = new HashSet<URL>();
+      s.add(new URL("http://example.com/disallowed"));
+      linkMap.put("http://example.com/page2", s);
+      
+      // Simulate a robots.txt file with comments and a few disallows
+      StringBuilder sb = new StringBuilder();
+      sb.append("# Comments appear after the \"#\" symbol at the start of a line, or after a directive\n");
+      sb.append("User-agent: * # match all bots\n");
+      sb.append("Disallow:  # This is void\n");
+      sb.append("Disallow: /disallow # Disallow this path\n");
+      sb.append("Disallow: /nonexistingpath # Disallow this path\n");
+      this.robotsCache.put("example.com", SimplePostTool.pageFetcher.
+          parseRobotsTxt(new ByteArrayInputStream(sb.toString().getBytes("UTF-8"))));
+    }
+    
+    @Override
+    public PageFetcherResult readPageFromUrl(URL u) {
+      PageFetcherResult res = (new SimplePostTool()).new PageFetcherResult();
+      if (isDisallowedByRobots(u)) {
+        res.httpStatus = 403;
+        return res;
+      }
+      res.httpStatus = 200;
+      res.contentType = "text/html";
+      try {
+        res.content = htmlMap.get(u.toString()).getBytes("UTF-8");
+      } catch (UnsupportedEncodingException e) {
+        throw new RuntimeException();
+      }
+      return res;
+    }
+    
+    @Override
+    public Set<URL> getLinksFromWebPage(URL u, InputStream is, String type, URL postUrl) {
+      Set<URL> s = linkMap.get(SimplePostTool.normalizeUrlEnding(u.toString()));
+      if(s == null)
+        s = new HashSet<URL>();
+      return s;
+    }
+  }
+}
--- a/Show More
+++ b/Show More