LUCENE-5487: merge trunk

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene5487@1576473 13f79535-47bb-0310-9956-ffa450edef68
2014-03-11 19:03:45 +00:00 · 2014-03-11 19:03:45 +00:00 · 2d88332e9d
parent c62763cf82 ffca357397
commit 2d88332e9d
55 changed files with 1065 additions and 161 deletions
--- a/build.xml
+++ b/build.xml
@ -18,6 +18,12 @@
 -->

 <project name="lucene-solr" default="-projecthelp" basedir=".">
+  <!-- Look for property definition in various *build.properties files -->
+  <property file="${user.home}/lucene.build.properties"/>
+  <property file="${user.home}/build.properties"/>
+  <property file="${basedir}/build.properties"/>
+  <property file="lucene/build.properties"/><!-- hack for Lucene users, clones Lucene's common-build.xml -->
+
  <target name="-projecthelp">
    <java fork="false" classname="org.apache.tools.ant.Main" taskname="-">
      <arg value="-projecthelp"/>
@ -268,10 +274,6 @@
  </target>

  <target name="idea" depends="resolve" description="Setup IntelliJ IDEA configuration">
-    <!-- Look for property definition for ${idea.jdk} in various *build.properties files -->
-    <property file="lucene/build.properties"/>    <!-- Look in the current project first -->
-    <property file="${user.home}/lucene.build.properties"/>
-    <property file="${user.home}/build.properties"/>
    <condition property="idea.jdk.is.set">
      <isset property="idea.jdk"/>
    </condition>
--- a/extra-targets.xml
+++ b/extra-targets.xml
@ -92,7 +92,7 @@
    <svn-checker failonmodifications="true"/>
  </target>
  
-  <property name="svnkit.version" value="1.7.8"/>
+  <property name="svnkit.version" value="1.8.4"/>

  <macrodef xmlns:ivy="antlib:org.apache.ivy.ant" name="svn-checker">
    <attribute name="failonmodifications" default="true"/> <!-- false if file modifications are allowed -->
@ -107,8 +107,6 @@
      import org.tmatesoft.svn.core.wc.*;
      import org.apache.tools.ant.Project;
      
-      def RECOMMENDED_SVNKIT_18 = '1.8.2';
-      
      SVNClientManager manager = SVNClientManager.newInstance();
      SVNStatusClient statusClient = manager.getStatusClient();
      SVNWCClient wcClient = manager.getWCClient();
@ -124,11 +122,7 @@
        def ec = ex.getErrorMessage().getErrorCode();
        int code = ec.getCode();
        int category = ec.getCategory();
-        if (code == SVNErrorCode.WC_UNSUPPORTED_FORMAT.getCode()) {
-          task.log('WARNING: Unsupported SVN working copy version! Disabling checks...', Project.MSG_WARN);
-          task.log('If your working copy is on version 1.8 already, please pass -Dsvnkit.version=' + RECOMMENDED_SVNKIT_18 + ' to successfully run checks.', Project.MSG_INFO);
-          return;
-        } else if (code == SVNErrorCode.WC_NOT_DIRECTORY.getCode() || code == SVNErrorCode.WC_NOT_FILE.getCode()) {
+        if (code == SVNErrorCode.WC_NOT_DIRECTORY.getCode() || code == SVNErrorCode.WC_NOT_FILE.getCode()) {
          task.log('WARNING: Development directory is not an SVN checkout! Disabling checks...', Project.MSG_WARN);
          return;
        } else if (category == SVNErrorCode.WC_CATEGORY) {
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@ -147,15 +147,33 @@ Bug fixes
  recursive affix application are driven correctly by continuation classes in the affix file.
  (Robert Muir)

+* LUCENE-5497: HunspellStemFilter properly handles escaped terms and affixes without conditions.
+  (Robert Muir)
+
+* LUCENE-5505: HunspellStemFilter ignores BOM markers in dictionaries and handles varying
+  types of whitespace in SET/FLAG commands. (Robert Muir)
+
+* LUCENE-5507: Fix HunspellStemFilter loading of dictionaries with large amounts of aliases
+  etc before the encoding declaration.  (Robert Muir)
+
+* LUCENE-5502: Fixed TermsFilter.equals that could return true for different
+  filters. (Igor Motov via Adrien Grand)
+
 Test Framework

 * LUCENE-5449: Rename _TestUtil and _TestHelper to remove the leading _.

+* LUCENE-5501: Added random out-of-order collection testing (when the collector
+  supports it) to AssertingIndexSearcher. (Adrien Grand)
+
 Build

 * LUCENE-5463: RamUsageEstimator.(human)sizeOf(Object) is now a forbidden API.
  (Adrien Grand, Robert Muir)

+* LUCENE-5511: "ant precommit" / "ant check-svn-working-copy" now work again
+  with any working copy format (thanks to svnkit 1.8.4).  (Uwe Schindler)
+
 ======================= Lucene 4.7.0 =======================

 New Features
@ -188,7 +206,7 @@ New Features
  AnalyzingInfixSuggester but boosts suggestions that matched tokens
  with lower positions.  (Remi Melisson via Mike McCandless)

-* LUCENE-4399: When sorting by String (SortField.STRING), you can now
+* LUCENE-5399: When sorting by String (SortField.STRING), you can now
  specify whether missing values should be sorted first (the default),
  using SortField.setMissingValue(SortField.STRING_FIRST), or last,
  using SortField.setMissingValue(SortField.STRING_LAST). (Rob Muir,
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Dictionary.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Dictionary.java
@ -35,12 +35,16 @@ import org.apache.lucene.util.fst.Outputs;
 import org.apache.lucene.util.fst.Util;

 import java.io.BufferedInputStream;
+import java.io.BufferedOutputStream;
 import java.io.BufferedReader;
 import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.InputStreamReader;
 import java.io.LineNumberReader;
+import java.io.OutputStream;
 import java.nio.charset.Charset;
 import java.nio.charset.CharsetDecoder;
 import java.nio.charset.CodingErrorAction;
@ -54,6 +58,7 @@ import java.util.List;
 import java.util.Locale;
 import java.util.Map;
 import java.util.TreeMap;
+import java.util.regex.Matcher;
 import java.util.regex.Pattern;

 /**
@ -154,21 +159,41 @@ public class Dictionary {
    this.ignoreCase = ignoreCase;
    this.needsInputCleaning = ignoreCase;
    this.needsOutputCleaning = false; // set if we have an OCONV
-    // TODO: we really need to probably buffer this on disk since so many newer dictionaries
-    // (en_GB, hu_HU, etc) now have tons of AM lines (morph metadata) etc before they finally declare 
-    // their encoding... but for now this large buffer is a workaround
-    BufferedInputStream buffered = new BufferedInputStream(affix, 65536);
-    buffered.mark(65536);
-    String encoding = getDictionaryEncoding(buffered);
-    buffered.reset();
-    CharsetDecoder decoder = getJavaEncoding(encoding);
-    readAffixFile(buffered, decoder);
    flagLookup.add(new BytesRef()); // no flags -> ord 0
    stripLookup.add(new BytesRef()); // no strip -> ord 0
-    IntSequenceOutputs o = IntSequenceOutputs.getSingleton();
-    Builder<IntsRef> b = new Builder<IntsRef>(FST.INPUT_TYPE.BYTE4, o);
-    readDictionaryFiles(dictionaries, decoder, b);
-    words = b.finish();
+
+    File aff = File.createTempFile("affix", "aff", tempDir);
+    OutputStream out = new BufferedOutputStream(new FileOutputStream(aff));
+    InputStream aff1 = null;
+    InputStream aff2 = null;
+    try {
+      // copy contents of affix stream to temp file
+      final byte [] buffer = new byte [1024 * 8];
+      int len;
+      while ((len = affix.read(buffer)) > 0) {
+        out.write(buffer, 0, len);
+      }
+      out.close();
+      
+      // pass 1: get encoding
+      aff1 = new BufferedInputStream(new FileInputStream(aff));
+      String encoding = getDictionaryEncoding(aff1);
+      
+      // pass 2: parse affixes
+      CharsetDecoder decoder = getJavaEncoding(encoding);
+      aff2 = new BufferedInputStream(new FileInputStream(aff));
+      readAffixFile(aff2, decoder);
+      
+      // read dictionary entries
+      IntSequenceOutputs o = IntSequenceOutputs.getSingleton();
+      Builder<IntsRef> b = new Builder<IntsRef>(FST.INPUT_TYPE.BYTE4, o);
+      readDictionaryFiles(dictionaries, decoder, b);
+      words = b.finish();
+      aliases = null; // no longer needed
+    } finally {
+      IOUtils.closeWhileHandlingException(out, aff1, aff2);
+      aff.delete();
+    }
  }

  /**
@ -251,6 +276,10 @@ public class Dictionary {
    LineNumberReader reader = new LineNumberReader(new InputStreamReader(affixStream, decoder));
    String line = null;
    while ((line = reader.readLine()) != null) {
+      // ignore any BOM marker on first line
+      if (reader.getLineNumber() == 1 && line.startsWith("\uFEFF")) {
+        line = line.substring(1);
+      }
      if (line.startsWith(ALIAS_KEY)) {
        parseAlias(line);
      } else if (line.startsWith(PREFIX_KEY)) {
@ -348,8 +377,10 @@ public class Dictionary {
      String line = reader.readLine();
      String ruleArgs[] = line.split("\\s+");

-      if (ruleArgs.length < 5) {
-          throw new ParseException("The affix file contains a rule with less than five elements", reader.getLineNumber());
+      // from the manpage: PFX flag stripping prefix [condition [morphological_fields...]]
+      // condition is optional
+      if (ruleArgs.length < 4) {
+          throw new ParseException("The affix file contains a rule with less than four elements: " + line, reader.getLineNumber());
      }
      
      char flag = flagParsingStrategy.parseFlag(ruleArgs[1]);
@ -370,7 +401,7 @@ public class Dictionary {
        Arrays.sort(appendFlags);
      }

-      String condition = ruleArgs[4];
+      String condition = ruleArgs.length > 4 ? ruleArgs[4] : ".";
      // at least the gascon affix file has this issue
      if (condition.startsWith("[") && !condition.endsWith("]")) {
        condition = condition + "]";
@ -464,6 +495,9 @@ public class Dictionary {
    
    return builder.finish();
  }
+  
+  /** pattern accepts optional BOM + SET + any whitespace */
+  final static Pattern ENCODING_PATTERN = Pattern.compile("^(\u00EF\u00BB\u00BF)?SET\\s+");

  /**
   * Parses the encoding specified in the affix file readable through the provided InputStream
@ -473,7 +507,7 @@ public class Dictionary {
   * @throws IOException Can be thrown while reading from the InputStream
   * @throws ParseException Thrown if the first non-empty non-comment line read from the file does not adhere to the format {@code SET <encoding>}
   */
-  private String getDictionaryEncoding(InputStream affix) throws IOException, ParseException {
+  static String getDictionaryEncoding(InputStream affix) throws IOException, ParseException {
    final StringBuilder encoding = new StringBuilder();
    for (;;) {
      encoding.setLength(0);
@ -496,9 +530,10 @@ public class Dictionary {
        }
        continue;
      }
-      if (encoding.length() > 4 && "SET ".equals(encoding.substring(0, 4))) {
-        // cleanup the encoding string, too (whitespace)
-        return encoding.substring(4).trim();
+      Matcher matcher = ENCODING_PATTERN.matcher(encoding);
+      if (matcher.find()) {
+        int last = matcher.end();
+        return encoding.substring(last).trim();
      }
    }
  }
@ -536,8 +571,12 @@ public class Dictionary {
   * @param flagLine Line containing the flag information
   * @return FlagParsingStrategy that handles parsing flags in the way specified in the FLAG definition
   */
-  private FlagParsingStrategy getFlagParsingStrategy(String flagLine) {
-    String flagType = flagLine.substring(5);
+  static FlagParsingStrategy getFlagParsingStrategy(String flagLine) {
+    String parts[] = flagLine.split("\\s+");
+    if (parts.length != 2) {
+      throw new IllegalArgumentException("Illegal FLAG specification: " + flagLine);
+    }
+    String flagType = parts[1];

    if (NUM_FLAG_TYPE.equals(flagType)) {
      return new NumFlagParsingStrategy();
@ -550,6 +589,24 @@ public class Dictionary {
    throw new IllegalArgumentException("Unknown flag type: " + flagType);
  }

+  final char FLAG_SEPARATOR = 0x1f; // flag separator after escaping
+  
+  String unescapeEntry(String entry) {
+    StringBuilder sb = new StringBuilder();
+    for (int i = 0; i < entry.length(); i++) {
+      char ch = entry.charAt(i);
+      if (ch == '\\' && i+1 < entry.length()) {
+        sb.append(entry.charAt(i+1));
+        i++;
+      } else if (ch == '/') {
+        sb.append(FLAG_SEPARATOR);
+      } else {
+        sb.append(ch);
+      }
+    }
+    return sb.toString();
+  }
+  
  /**
   * Reads the dictionary file through the provided InputStreams, building up the words map
   *
@ -570,8 +627,9 @@ public class Dictionary {
        String line = lines.readLine(); // first line is number of entries (approximately, sometimes)
        
        while ((line = lines.readLine()) != null) {
+          line = unescapeEntry(line);
          if (needsInputCleaning) {
-            int flagSep = line.lastIndexOf('/');
+            int flagSep = line.lastIndexOf(FLAG_SEPARATOR);
            if (flagSep == -1) {
              CharSequence cleansed = cleanInput(line, sb);
              writer.write(cleansed.toString().getBytes(IOUtils.CHARSET_UTF_8));
@ -604,7 +662,7 @@ public class Dictionary {
        scratch1.length = o1.length;
        
        for (int i = scratch1.length - 1; i >= 0; i--) {
-          if (scratch1.bytes[scratch1.offset + i] == '/') {
+          if (scratch1.bytes[scratch1.offset + i] == FLAG_SEPARATOR) {
            scratch1.length = i;
            break;
          }
@ -615,7 +673,7 @@ public class Dictionary {
        scratch2.length = o2.length;
        
        for (int i = scratch2.length - 1; i >= 0; i--) {
-          if (scratch2.bytes[scratch2.offset + i] == '/') {
+          if (scratch2.bytes[scratch2.offset + i] == FLAG_SEPARATOR) {
            scratch2.length = i;
            break;
          }
@ -648,7 +706,7 @@ public class Dictionary {
      String entry;
      char wordForm[];
      
-      int flagSep = line.lastIndexOf('/');
+      int flagSep = line.lastIndexOf(FLAG_SEPARATOR);
      if (flagSep == -1) {
        wordForm = NOFLAGS;
        entry = line;
@ -738,7 +796,9 @@ public class Dictionary {
      final int count = Integer.parseInt(ruleArgs[1]);
      aliases = new String[count];
    } else {
-      aliases[aliasCount++] = ruleArgs[1];
+      // an alias can map to no flags
+      String aliasValue = ruleArgs.length == 1 ? "" : ruleArgs[1];
+      aliases[aliasCount++] = aliasValue;
    }
  }
  
@ -753,7 +813,7 @@ public class Dictionary {
  /**
   * Abstraction of the process of parsing flags taken from the affix and dic files
   */
-  private static abstract class FlagParsingStrategy {
+  static abstract class FlagParsingStrategy {

    /**
     * Parses the given String into a single flag
@ -828,6 +888,9 @@ public class Dictionary {
      }

      StringBuilder builder = new StringBuilder();
+      if (rawFlags.length() % 2 == 1) {
+        throw new IllegalArgumentException("Invalid flags (should be even number of characters): " + rawFlags);
+      }
      for (int i = 0; i < rawFlags.length(); i+=2) {
        char cookedFlag = (char) ((int) rawFlags.charAt(i) + (int) rawFlags.charAt(i + 1));
        builder.append(cookedFlag);
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/StemmerTestBase.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/StemmerTestBase.java
@ -55,7 +55,7 @@ abstract class StemmerTestBase extends LuceneTestCase {
    }
    
    try {
-      Dictionary dictionary = new Dictionary(affixStream, Arrays.asList(dictStreams), true);
+      Dictionary dictionary = new Dictionary(affixStream, Arrays.asList(dictStreams), ignoreCase);
      stemmer = new Stemmer(dictionary);
    } finally {
      IOUtils.closeWhileHandlingException(affixStream);
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestAllDictionaries2.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestAllDictionaries2.java
@ -47,8 +47,8 @@ public class TestAllDictionaries2 extends LuceneTestCase {
    "afrikaans_spell_checker-20110323-fx+tb+fn+sm.xpi",                               "dictionaries/af-ZA.dic",             "dictionaries/af-ZA.aff",
    "albanisches_worterbuch-1.6.9-fx+tb+sm+fn.xpi",                                   "dictionaries/sq.dic",                "dictionaries/sq.aff",
    "amharic_spell_checker-0.4-fx+fn+tb+sm.xpi",                                      "dictionaries/am_ET.dic",             "dictionaries/am_ET.aff",
-//BUG! "arabic_spell_checking_dictionary-3.2.20120321-fx+tb.xpi",                        "dictionaries/ar.dic",                "dictionaries/ar.aff",
-//BUG! "armenian_spell_checker_dictionary-0.32-fx+tb+sm.xpi",                            "dictionaries/hy_AM.dic",             "dictionaries/hy_AM.aff",
+    "arabic_spell_checking_dictionary-3.2.20120321-fx+tb.xpi",                        "dictionaries/ar.dic",                "dictionaries/ar.aff",
+    "armenian_spell_checker_dictionary-0.32-fx+tb+sm.xpi",                            "dictionaries/hy_AM.dic",             "dictionaries/hy_AM.aff",
    "azerbaijani_spell_checker-0.3-fx+tb+fn+sm+sb.xpi",                               "dictionaries/az-Latn-AZ.dic",        "dictionaries/az-Latn-AZ.aff",
    "belarusian_classic_dictionary-0.1.2-tb+fx+sm.xpi",                               "dictionaries/be-classic.dic",        "dictionaries/be-classic.aff",
    "belarusian_dictionary-0.1.2-fx+sm+tb.xpi",                                       "dictionaries/be.dic",                "dictionaries/be.aff",
@ -72,13 +72,13 @@ public class TestAllDictionaries2 extends LuceneTestCase {
    "diccionario_espanol_argentina-2.5.1-tb+fx+sm.xpi",                               "dictionaries/es_AR.dic",             "dictionaries/es_AR.aff",
    "diccionario_espanol_mexico-1.1.3-fn+tb+fx+sm.xpi",                               "dictionaries/es_MX.dic",             "dictionaries/es_MX.aff",
    "diccionario_ortografico_valenciano-2.2.0-fx+tb+fn+sm.xpi",                       "dictionaries/roa-ES-val.dic",        "dictionaries/roa-ES-val.aff",
-//BUG! "diccionario_papiamentoaruba-0.2-fn+sm+tb+fx.xpi",                                "dictionaries/Papiamento.dic",        "dictionaries/Papiamento.aff",
+    "diccionario_papiamentoaruba-0.2-fn+sm+tb+fx.xpi",                                "dictionaries/Papiamento.dic",        "dictionaries/Papiamento.aff",
    "dictionnaires_francais-5.0.2-fx+tb+sm.xpi",                                      "dictionaries/fr-classic-reform.dic", "dictionaries/fr-classic-reform.aff",
    "dictionnaires_francais-5.0.2-fx+tb+sm.xpi",                                      "dictionaries/fr-classic.dic",        "dictionaries/fr-classic.aff",
    "dictionnaires_francais-5.0.2-fx+tb+sm.xpi",                                      "dictionaries/fr-modern.dic",         "dictionaries/fr-modern.aff",
    "dictionnaires_francais-5.0.2-fx+tb+sm.xpi",                                      "dictionaries/fr-reform.dic",         "dictionaries/fr-reform.aff",
    "difazier_an_drouizig-0.12-tb+sm+fx.xpi",                                         "dictionaries/br.dic",                "dictionaries/br.aff",
-//BUG! "dikshonario_papiamentuantia_hulandes-0.5-fx+tb+fn+sb+sm.xpi",                    "dictionaries/Papiamentu.dic",        "dictionaries/Papiamentu.aff",
+    "dikshonario_papiamentuantia_hulandes-0.5-fx+tb+fn+sb+sm.xpi",                    "dictionaries/Papiamentu.dic",        "dictionaries/Papiamentu.aff",
    "dizionari_furlan-3.1-tb+fx+sm.xpi",                                              "dictionaries/fur-IT.dic",            "dictionaries/fur-IT.aff",
    "dizionario_italiano-3.3.2-fx+sm+tb.xpi",                                         "dictionaries/it_IT.dic",             "dictionaries/it_IT.aff",
    "eesti_keele_speller-3.2-fx+tb+sm.xpi",                                           "dictionaries/et-EE.dic",             "dictionaries/et-EE.aff",
@ -101,10 +101,10 @@ public class TestAllDictionaries2 extends LuceneTestCase {
    "hausa_spelling_dictionary-0.2-tb+fx.xpi",                                        "dictionaries/ha-GH.dic",             "dictionaries/ha-GH.aff",
    "hebrew_spell_checking_dictionary_from_hspell-1.2.0.1-fx+sm+tb.xpi",              "dictionaries/he.dic",                "dictionaries/he.aff",
    "hindi_spell_checker-0.4-fx+tb+sm+sb+fn.xpi",                                     "dictionaries/hi_IN.dic",             "dictionaries/hi_IN.aff",
-//BUG! "hungarian_dictionary-1.6.1.1-fx+tb+sm+fn.xpi",                                   "dictionaries/hu_HU.dic",             "dictionaries/hu_HU.aff",
-//BUG! "icelandic_dictionary-1.3-fx+tb+sm.xpi",                                          "dictionaries/is.dic",                "dictionaries/is.aff",
+    "hungarian_dictionary-1.6.1.1-fx+tb+sm+fn.xpi",                                   "dictionaries/hu.dic",                "dictionaries/hu.aff",
+//BUG: has no encoding declaration "icelandic_dictionary-1.3-fx+tb+sm.xpi",                                          "dictionaries/is.dic",                "dictionaries/is.aff",
    "kamus_pengecek_ejaan_bahasa_indonesia-1.1-fx+tb.xpi",                            "dictionaries/id.dic",                "dictionaries/id.aff",
-//BUG! "kannada_spell_checker-2.0.1-tb+sm+fn+an+fx.xpi",                                 "dictionaries/kn.dic",                "dictionaries/kn.aff",
+    "kannada_spell_checker-2.0.1-tb+sm+fn+an+fx.xpi",                                 "dictionaries/kn.dic",                "dictionaries/kn.aff",
    "kashubian_spell_checker_poland-0.9-sm+tb+fx.xpi",                                "dictionaries/Kaszebsczi.dic",        "dictionaries/Kaszebsczi.aff",
    "kiswahili_spell_checker-0.3-sb+tb+fn+fx+sm.xpi",                                 "dictionaries/sw_TZ.dic",             "dictionaries/sw_TZ.aff",
    "kurdish_spell_checker-0.96-fx+tb+sm.xpi",                                        "dictionaries/ku-TR.dic",             "dictionaries/ku-TR.aff",
@ -113,8 +113,8 @@ public class TestAllDictionaries2 extends LuceneTestCase {
    "lithuanian_spelling_check_dictionary-1.3-fx+tb+sm+fn.xpi",                       "dictionaries/lt.dic",                "dictionaries/lt.aff",
    "litreoir_gaelspell_do_mhozilla-4.7-tb+fx+sm+fn.xpi",                             "dictionaries/ga.dic",                "dictionaries/ga.aff",
    "litreoir_na_liongailise-0.03-fx+sm+tb.xpi",                                      "dictionaries/ln-CD.dic",             "dictionaries/ln-CD.aff",
-//BUG! "macedonian_mk_mk_spellchecker-1.2-fn+tb+fx+sm+sb.xpi",                           "dictionaries/mk-MK-Cyrl.dic",        "dictionaries/mk-MK-Cyrl.aff",
-//BUG! "macedonian_mk_mk_spellchecker-1.2-fn+tb+fx+sm+sb.xpi",                           "dictionaries/mk-MK-Latn.dic",        "dictionaries/mk-MK-Latn.aff",
+    "macedonian_mk_mk_spellchecker-1.2-fn+tb+fx+sm+sb.xpi",                           "dictionaries/mk-MK-Cyrl.dic",        "dictionaries/mk-MK-Cyrl.aff",
+    "macedonian_mk_mk_spellchecker-1.2-fn+tb+fx+sm+sb.xpi",                           "dictionaries/mk-MK-Latn.dic",        "dictionaries/mk-MK-Latn.aff",
    "malagasy_spell_checker-0.3-fn+tb+fx+sm+sb.xpi",                                  "dictionaries/mg_MG.dic",             "dictionaries/mg_MG.aff",
    "marathi_dictionary-9.3-sm+tb+sb+fx.xpi",                                         "dictionaries/mr-IN.dic",             "dictionaries/mr-IN.aff",
    "ndebele_south_spell_checker-20110323-tb+fn+fx+sm.xpi",                           "dictionaries/nr-ZA.dic",             "dictionaries/nr-ZA.aff",
@ -125,8 +125,8 @@ public class TestAllDictionaries2 extends LuceneTestCase {
    "oriya_spell_checker-0.3-fn+tb+fx+sm+sb.xpi",                                     "dictionaries/or-IN.dic",             "dictionaries/or-IN.aff",
    "polski_slownik_poprawnej_pisowni-1.0.20110621-fx+tb+sm.xpi",                     "dictionaries/pl.dic",                "dictionaries/pl.aff",
    "punjabi_spell_checker-0.3-fx+tb+sm+sb+fn.xpi",                                   "dictionaries/pa-IN.dic",             "dictionaries/pa-IN.aff",
-//BUG! "romanian_spellchecking_dictionary-1.14-sm+tb+fx.xpi",                            "dictionaries/ro_RO-ante1993.dic",    "dictionaries/ro_RO-ante1993.aff",
-//BUG! "russian_hunspell_dictionary-1.0.20131101-tb+sm+fn+fx.xpi",                       "dictionaries/ru_RU.dic",             "dictionaries/ru_RU.aff",
+    "romanian_spellchecking_dictionary-1.14-sm+tb+fx.xpi",                            "dictionaries/ro_RO-ante1993.dic",    "dictionaries/ro_RO-ante1993.aff",
+    "russian_hunspell_dictionary-1.0.20131101-tb+sm+fn+fx.xpi",                       "dictionaries/ru_RU.dic",             "dictionaries/ru_RU.aff",
    "sanskrit_spell_checker-1.1-fx+tb+sm+sb+fn.xpi",                                  "dictionaries/sa_IN.dic",             "dictionaries/sa_IN.aff",
    "scottish_gaelic_spell_checker-2.7-tb+fx+sm.xpi",                                 "dictionaries/gd-GB.dic",             "dictionaries/gd-GB.aff",
    "serbian_dictionary-0.18-fx+tb+sm.xpi",                                           "dictionaries/sr-RS-Cyrl.dic",        "dictionaries/sr-RS-Cyrl.aff",
@ -146,22 +146,22 @@ public class TestAllDictionaries2 extends LuceneTestCase {
    "telugu_spell_checker-0.3-tb+fx+sm.xpi",                                          "dictionaries/te_IN.dic",             "dictionaries/te_IN.aff",
    "te_papakupu_m__ori-0.9.9.20080630-fx+tb.xpi",                                    "dictionaries/mi-x-Tai Tokerau.dic",  "dictionaries/mi-x-Tai Tokerau.aff",
    "te_papakupu_m__ori-0.9.9.20080630-fx+tb.xpi",                                    "dictionaries/mi.dic",                "dictionaries/mi.aff",
-//BUG! "thamizha_solthiruthitamil_spellchecker-0.8-fx+tb.xpi",                           "dictionaries/ta_IN.dic",             "dictionaries/ta_IN.aff",
+//BUG: broken file (hunspell refuses to load, too)    "thamizha_solthiruthitamil_spellchecker-0.8-fx+tb.xpi",                           "dictionaries/ta_IN.dic",             "dictionaries/ta_IN.aff",
    "tsonga_spell_checker-20110323-tb+sm+fx+fn.xpi",                                  "dictionaries/ts-ZA.dic",             "dictionaries/ts-ZA.aff",
    "tswana_spell_checker-20110323-tb+sm+fx+fn.xpi",                                  "dictionaries/tn-ZA.dic",             "dictionaries/tn-ZA.aff",
    "turkce_yazm_denetimi-3.5-sm+tb+fx.xpi",                                          "dictionaries/tr.dic",                "dictionaries/tr.aff",
-//BUG! "turkmen_spell_checker_dictionary-0.1.6-tb+fx+sm.xpi",                            "dictionaries/tk_TM.dic",             "dictionaries/tk_TM.aff",
+    "turkmen_spell_checker_dictionary-0.1.6-tb+fx+sm.xpi",                            "dictionaries/tk_TM.dic",             "dictionaries/tk_TM.aff",
    "ukrainian_dictionary-1.7.0-sm+an+fx+fn+tb.xpi",                                  "dictionaries/uk-UA.dic",             "dictionaries/uk-UA.aff",
    "united_states_english_spellchecker-7.0.1-sm+tb+fx+an.xpi",                       "dictionaries/en-US.dic",             "dictionaries/en-US.aff",
    "upper_sorbian_spelling_dictionary-0.0.20060327.3-tb+fx+sm.xpi",                  "dictionaries/hsb.dic",               "dictionaries/hsb.aff",
-//BUG! "urdu_dictionary-0.64-fx+tb+sm+sb.xpi",                                           "dictionaries/ur.dic",                "dictionaries/ur.aff",
+    "urdu_dictionary-0.64-fx+tb+sm+sb.xpi",                                           "dictionaries/ur.dic",                "dictionaries/ur.aff",
    "uzbek_spell_checker-0.3-fn+tb+fx+sm+sb.xpi",                                     "dictionaries/uz.dic",                "dictionaries/uz.aff",
    "valencian_catalan_dictionary-2.5.0-tb+fn+sm+fx.xpi",                             "dictionaries/ca-ES-valencia.dic",    "dictionaries/ca-ES-valencia.aff",
    "venda_spell_checker-20110323-tb+fn+fx+sm.xpi",                                   "dictionaries/ve-ZA.dic",             "dictionaries/ve-ZA.aff",
    "verificador_ortografico_para_portugues_do_brasil-2.3-3.2b1-tb+sm+fn+fx.xpi",     "dictionaries/pt_BR.dic",             "dictionaries/pt_BR.aff",
    "vietnamese_dictionary-2.1.0.159-an+sm+tb+fx+fn.xpi",                             "dictionaries/vi-DauCu.dic",          "dictionaries/vi-DauCu.aff",
    "vietnamese_dictionary-2.1.0.159-an+sm+tb+fx+fn.xpi",                             "dictionaries/vi-DauMoi.dic",         "dictionaries/vi-DauMoi.aff",
-//BUG! "woordenboek_nederlands-3.1.1-sm+tb+fx+fn.xpi",                                   "dictionaries/nl.dic",                "dictionaries/nl.aff",
+    "woordenboek_nederlands-3.1.1-sm+tb+fx+fn.xpi",                                   "dictionaries/nl.dic",                "dictionaries/nl.aff",
    "xhosa_spell_checker-20110323-tb+fn+fx+sm.xpi",                                   "dictionaries/xh-ZA.dic",             "dictionaries/xh-ZA.aff",
    "xuxen-4.0.1-fx+tb+sm.xpi",                                                       "dictionaries/eu.dic",                "dictionaries/eu.aff",
    "yiddish_spell_checker_yivo-0.0.3-sm+fn+fx+tb.xpi",                               "dictionaries/yi.dic",                "dictionaries/yi.aff",
@ -196,7 +196,7 @@ public class TestAllDictionaries2 extends LuceneTestCase {
  }
  
  public void testOneDictionary() throws Exception {
-    String toTest = "hausa_spelling_dictionary-0.2-tb+fx.xpi";
+    String toTest = "hungarian_dictionary-1.6.1.1-fx+tb+sm+fn.xpi";
    for (int i = 0; i < tests.length; i++) {
      if (tests[i].equals(toTest)) {
        File f = new File(DICTIONARY_HOME, tests[i]);
@ -210,7 +210,7 @@ public class TestAllDictionaries2 extends LuceneTestCase {
        
          try (InputStream dictionary = zip.getInputStream(dicEntry);
               InputStream affix = zip.getInputStream(affEntry)) {
-              new Dictionary(affix, dictionary);
+            new Dictionary(affix, dictionary);
          }
        }
      }
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestDictionary.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestDictionary.java
@ -17,6 +17,7 @@ package org.apache.lucene.analysis.hunspell;
 * limitations under the License.
 */

+import java.io.ByteArrayInputStream;
 import java.io.FilterInputStream;
 import java.io.IOException;
 import java.io.InputStream;
@ -24,6 +25,7 @@ import java.text.ParseException;

 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.CharsRef;
+import org.apache.lucene.util.IOUtils;
 import org.apache.lucene.util.IntsRef;
 import org.apache.lucene.util.LuceneTestCase;
 import org.apache.lucene.util.fst.Builder;
@ -77,6 +79,40 @@ public class TestDictionary extends LuceneTestCase {
    affixStream.close();
    dictStream.close();
  }
+  
+  public void testCompressedBeforeSetDictionary() throws Exception {
+    InputStream affixStream = getClass().getResourceAsStream("compressed-before-set.aff");
+    InputStream dictStream = getClass().getResourceAsStream("compressed.dic");
+
+    Dictionary dictionary = new Dictionary(affixStream, dictStream);
+    assertEquals(3, dictionary.lookupSuffix(new char[]{'e'}, 0, 1).length);
+    assertEquals(1, dictionary.lookupPrefix(new char[]{'s'}, 0, 1).length);
+    IntsRef ordList = dictionary.lookupWord(new char[]{'o', 'l', 'r'}, 0, 3);
+    BytesRef ref = new BytesRef();
+    dictionary.flagLookup.get(ordList.ints[0], ref);
+    char flags[] = Dictionary.decodeFlags(ref);
+    assertEquals(1, flags.length);
+    
+    affixStream.close();
+    dictStream.close();
+  }
+  
+  public void testCompressedEmptyAliasDictionary() throws Exception {
+    InputStream affixStream = getClass().getResourceAsStream("compressed-empty-alias.aff");
+    InputStream dictStream = getClass().getResourceAsStream("compressed.dic");
+
+    Dictionary dictionary = new Dictionary(affixStream, dictStream);
+    assertEquals(3, dictionary.lookupSuffix(new char[]{'e'}, 0, 1).length);
+    assertEquals(1, dictionary.lookupPrefix(new char[]{'s'}, 0, 1).length);
+    IntsRef ordList = dictionary.lookupWord(new char[]{'o', 'l', 'r'}, 0, 3);
+    BytesRef ref = new BytesRef();
+    dictionary.flagLookup.get(ordList.ints[0], ref);
+    char flags[] = Dictionary.decodeFlags(ref);
+    assertEquals(1, flags.length);
+    
+    affixStream.close();
+    dictStream.close();
+  }

  // malformed rule causes ParseException
  public void testInvalidData() throws Exception {
@ -87,7 +123,7 @@ public class TestDictionary extends LuceneTestCase {
      new Dictionary(affixStream, dictStream);
      fail("didn't get expected exception");
    } catch (ParseException expected) {
-      assertEquals("The affix file contains a rule with less than five elements", expected.getMessage());
+      assertTrue(expected.getMessage().startsWith("The affix file contains a rule with less than four elements"));
      assertEquals(24, expected.getErrorOffset());
    }
    
@ -178,4 +214,16 @@ public class TestDictionary extends LuceneTestCase {
    Dictionary.applyMappings(fst, sb);
    assertEquals("ghghghde", sb.toString());
  }
+  
+  public void testSetWithCrazyWhitespaceAndBOMs() throws Exception {
+    assertEquals("UTF-8", Dictionary.getDictionaryEncoding(new ByteArrayInputStream("SET\tUTF-8\n".getBytes(IOUtils.CHARSET_UTF_8))));
+    assertEquals("UTF-8", Dictionary.getDictionaryEncoding(new ByteArrayInputStream("SET\t UTF-8\n".getBytes(IOUtils.CHARSET_UTF_8))));
+    assertEquals("UTF-8", Dictionary.getDictionaryEncoding(new ByteArrayInputStream("\uFEFFSET\tUTF-8\n".getBytes(IOUtils.CHARSET_UTF_8))));
+    assertEquals("UTF-8", Dictionary.getDictionaryEncoding(new ByteArrayInputStream("\uFEFFSET\tUTF-8\r\n".getBytes(IOUtils.CHARSET_UTF_8))));
+  }
+  
+  public void testFlagWithCrazyWhitespace() throws Exception {
+    assertNotNull(Dictionary.getFlagParsingStrategy("FLAG\tUTF-8"));
+    assertNotNull(Dictionary.getFlagParsingStrategy("FLAG    UTF-8"));
+  }
 }
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestEscaped.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestEscaped.java
@ -0,0 +1,36 @@
+package org.apache.lucene.analysis.hunspell;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.junit.BeforeClass;
+
+public class TestEscaped extends StemmerTestBase {
+  @BeforeClass
+  public static void beforeClass() throws Exception {
+    init("escaped.aff", "escaped.dic");
+  }
+  
+  public void testStemming() {
+    assertStemsTo("works", "work");
+    assertStemsTo("work", "work");
+    assertStemsTo("R2/D2", "R2/D2");
+    assertStemsTo("R2/D2s", "R2/D2");
+    assertStemsTo("N/A", "N/A");
+    assertStemsTo("N/As");
+  }
+}
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestHunspellStemFilter.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestHunspellStemFilter.java
@ -31,6 +31,7 @@ import org.apache.lucene.analysis.hunspell.Dictionary;
 import org.apache.lucene.analysis.hunspell.HunspellStemFilter;
 import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
 import org.apache.lucene.analysis.util.CharArraySet;
+import org.apache.lucene.util.IOUtils;
 import org.junit.AfterClass;
 import org.junit.BeforeClass;

@ -39,9 +40,13 @@ public class TestHunspellStemFilter extends BaseTokenStreamTestCase {
  
  @BeforeClass
  public static void beforeClass() throws Exception {
-    try (InputStream affixStream = TestStemmer.class.getResourceAsStream("simple.aff");
-         InputStream dictStream = TestStemmer.class.getResourceAsStream("simple.dic")) {
+    // no multiple try-with to workaround bogus VerifyError
+    InputStream affixStream = TestStemmer.class.getResourceAsStream("simple.aff");
+    InputStream dictStream = TestStemmer.class.getResourceAsStream("simple.dic");
+    try {
      dictionary = new Dictionary(affixStream, dictStream);
+    } finally {
+      IOUtils.closeWhileHandlingException(affixStream, dictStream);
    }
  }
  
@ -97,9 +102,13 @@ public class TestHunspellStemFilter extends BaseTokenStreamTestCase {
  
  public void testIgnoreCaseNoSideEffects() throws Exception {
    final Dictionary d;
-    try (InputStream affixStream = TestStemmer.class.getResourceAsStream("simple.aff");
-        InputStream dictStream = TestStemmer.class.getResourceAsStream("simple.dic")) {
+    // no multiple try-with to workaround bogus VerifyError
+    InputStream affixStream = TestStemmer.class.getResourceAsStream("simple.aff");
+    InputStream dictStream = TestStemmer.class.getResourceAsStream("simple.dic");
+    try {
      d = new Dictionary(affixStream, Collections.singletonList(dictStream), true);
+    } finally {
+      IOUtils.closeWhileHandlingException(affixStream, dictStream);
    }
    Analyzer a = new Analyzer() {
      @Override
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestOptionalCondition.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestOptionalCondition.java
@ -0,0 +1,42 @@
+package org.apache.lucene.analysis.hunspell;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.junit.BeforeClass;
+
+public class TestOptionalCondition extends StemmerTestBase {
+  @BeforeClass
+  public static void beforeClass() throws Exception {
+    init("optional-condition.aff", "condition.dic");
+  }
+  
+  public void testStemming() {
+    assertStemsTo("hello", "hello");
+    assertStemsTo("try", "try");
+    assertStemsTo("tried", "try");
+    assertStemsTo("work", "work");
+    assertStemsTo("worked", "work");
+    assertStemsTo("rework", "work");
+    assertStemsTo("reworked", "work");
+    assertStemsTo("retried");
+    assertStemsTo("workied");
+    assertStemsTo("tryed");
+    assertStemsTo("tryied");
+    assertStemsTo("helloed");
+  }
+}
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/broken.aff
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/broken.aff
@ -19,6 +19,6 @@ SFX E   0     d         o
 PFX B Y 1
 PFX B   0     s         o

-#wrong rule (only 4 elements)
+#wrong rule (only 3 elements)
 PFX A0 Y 1
-PFX A0 0 a
+PFX A0 0
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/compressed-before-set.aff
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/compressed-before-set.aff
@ -0,0 +1,29 @@
+SET UTF-8
+TRY abcdefghijklmopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ
+
+FLAG long
+
+AF 5
+AF AA
+AF BB
+AF CC
+AF DD
+AF EE
+
+SFX AA Y 3
+SFX AA   0     e         n
+SFX AA   0     e         t
+SFX AA   0     e         h
+
+SFX CC Y 2
+SFX CC   0     d/3       c
+SFX CC   0     c         b
+
+SFX DD Y 1
+SFX DD   0     s         o
+
+SFX EE Y 1
+SFX EE   0     d         o
+
+PFX BB Y 1
+PFX BB   0     s         o
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/compressed-empty-alias.aff
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/compressed-empty-alias.aff
@ -0,0 +1,30 @@
+SET UTF-8
+TRY abcdefghijklmopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ
+
+FLAG long
+
+AF 6
+AF AA
+AF BB
+AF CC
+AF DD
+AF EE
+AF  
+
+SFX AA Y 3
+SFX AA   0     e         n
+SFX AA   0     e         t
+SFX AA   0     e         h
+
+SFX CC Y 2
+SFX CC   0     d/3       c
+SFX CC   0     c         b
+
+SFX DD Y 1
+SFX DD   0     s         o
+
+SFX EE Y 1
+SFX EE   0     d         o
+
+PFX BB Y 1
+PFX BB   0     s         o
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/compressed.aff
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/compressed.aff
@ -1,8 +1,3 @@
-SET UTF-8
-TRY abcdefghijklmopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ
-
-FLAG long
-
 AF 5
 AF AA
 AF BB
@ -10,6 +5,11 @@ AF CC
 AF DD
 AF EE

+SET UTF-8
+TRY abcdefghijklmopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ
+
+FLAG long
+
 SFX AA Y 3
 SFX AA   0     e         n
 SFX AA   0     e         t
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/escaped.aff
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/escaped.aff
@ -0,0 +1,4 @@
+SET UTF-8
+
+SFX A Y 1
+SFX A 0 s . +PLUR
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/escaped.dic
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/escaped.dic
@ -0,0 +1,4 @@
+3
+work/A
+R2\/D2/A
+N\/A
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/optional-condition.aff
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/optional-condition.aff
@ -0,0 +1,14 @@
+SET UTF-8
+TRY esianrtolcdugmphbyfvkwzESIANRTOLCDUGMPHBYFVKWZ’
+
+REP 2
+REP f ph
+REP ph f
+
+# has no condition
+PFX A Y 1
+PFX A 0 re
+
+SFX B Y 2
+SFX B 0 ed [^y]
+SFX B y ied y
--- a/lucene/core/src/java/org/apache/lucene/search/FakeScorer.java
+++ b/lucene/core/src/java/org/apache/lucene/search/FakeScorer.java
@ -19,7 +19,7 @@ package org.apache.lucene.search;

 import java.util.Collection;

-/** Used by {@link BulkScorers} that need to pass a {@link
+/** Used by {@link BulkScorer}s that need to pass a {@link
 *  Scorer} to {@link Collector#setScorer}. */
 final class FakeScorer extends Scorer {
  float score;
--- a/lucene/core/src/test/org/apache/lucene/codecs/lucene40/TestReuseDocsEnum.java
+++ b/lucene/core/src/test/org/apache/lucene/codecs/lucene40/TestReuseDocsEnum.java
@ -26,12 +26,13 @@ import org.apache.lucene.index.AtomicReader;
 import org.apache.lucene.index.AtomicReaderContext;
 import org.apache.lucene.index.DirectoryReader;
 import org.apache.lucene.index.DocsEnum;
+import org.apache.lucene.index.IndexWriter;
 import org.apache.lucene.index.RandomIndexWriter;
 import org.apache.lucene.index.Terms;
 import org.apache.lucene.index.TermsEnum;
 import org.apache.lucene.store.Directory;
-import org.apache.lucene.util.Bits;
 import org.apache.lucene.util.Bits.MatchNoBits;
+import org.apache.lucene.util.Bits;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.IOUtils;
 import org.apache.lucene.util.LineFileDocs;
@ -121,8 +122,11 @@ public class TestReuseDocsEnum extends LuceneTestCase {
  public void testReuseDocsEnumDifferentReader() throws IOException {
    Directory dir = newDirectory();
    Codec cp = TestUtil.alwaysPostingsFormat(new Lucene40RWPostingsFormat());
+    MockAnalyzer analyzer = new MockAnalyzer(random());
+    analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH));
+
    RandomIndexWriter writer = new RandomIndexWriter(random(), dir,
-        newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())).setCodec(cp));
+        newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer).setCodec(cp));
    int numdocs = atLeast(20);
    createRandomIndex(numdocs, writer, random());
    writer.commit();
--- a/lucene/core/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java
+++ b/lucene/core/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java
@ -139,8 +139,11 @@ public class TestBackwardsCompatibility extends LuceneTestCase {
    mp.setUseCompoundFile(false);
    mp.setNoCFSRatio(1.0);
    mp.setMaxCFSSegmentSizeMB(Double.POSITIVE_INFINITY);
+    MockAnalyzer analyzer = new MockAnalyzer(random());
+    analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH));
+
    // TODO: remove randomness
-    IndexWriterConfig conf = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()))
+    IndexWriterConfig conf = new IndexWriterConfig(TEST_VERSION_CURRENT, analyzer)
      .setMergePolicy(mp);
    conf.setCodec(Codec.forName("Lucene40"));
    IndexWriter writer = new IndexWriter(dir, conf);
--- a/lucene/core/src/test/org/apache/lucene/index/TestCustomNorms.java
+++ b/lucene/core/src/test/org/apache/lucene/index/TestCustomNorms.java
@ -30,6 +30,7 @@ import org.apache.lucene.search.similarities.Similarity;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.util.LineFileDocs;
 import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.util.TestUtil;

 /**
 * 
@ -41,8 +42,11 @@ public class TestCustomNorms extends LuceneTestCase {
  public void testFloatNorms() throws IOException {

    Directory dir = newDirectory();
+    MockAnalyzer analyzer = new MockAnalyzer(random());
+    analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH));
+
    IndexWriterConfig config = newIndexWriterConfig(TEST_VERSION_CURRENT,
-        new MockAnalyzer(random()));
+        analyzer);
    Similarity provider = new MySimProvider();
    config.setSimilarity(provider);
    RandomIndexWriter writer = new RandomIndexWriter(random(), dir, config);
--- a/lucene/core/src/test/org/apache/lucene/index/TestDuelingCodecs.java
+++ b/lucene/core/src/test/org/apache/lucene/index/TestDuelingCodecs.java
@ -20,7 +20,6 @@ package org.apache.lucene.index;
 import java.io.IOException;
 import java.util.Random;

-import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.MockAnalyzer;
 import org.apache.lucene.codecs.Codec;
 import org.apache.lucene.document.Document;
@ -63,9 +62,12 @@ public class TestDuelingCodecs extends LuceneTestCase {
    long seed = random().nextLong();

    // must use same seed because of random payloads, etc
-    Analyzer leftAnalyzer = new MockAnalyzer(new Random(seed));
-    Analyzer rightAnalyzer = new MockAnalyzer(new Random(seed));
-    
+    int maxTermLength = TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH);
+    MockAnalyzer leftAnalyzer = new MockAnalyzer(new Random(seed));
+    leftAnalyzer.setMaxTokenLength(maxTermLength);
+    MockAnalyzer rightAnalyzer = new MockAnalyzer(new Random(seed));
+    rightAnalyzer.setMaxTokenLength(maxTermLength);
+
    // but these can be different
    // TODO: this turns this into a really big test of Multi*, is that what we want?
    IndexWriterConfig leftConfig = newIndexWriterConfig(TEST_VERSION_CURRENT, leftAnalyzer);
--- a/lucene/core/src/test/org/apache/lucene/index/TestFlushByRamOrCountsPolicy.java
+++ b/lucene/core/src/test/org/apache/lucene/index/TestFlushByRamOrCountsPolicy.java
@ -29,6 +29,7 @@ import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.MockDirectoryWrapper;
 import org.apache.lucene.util.LineFileDocs;
 import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.util.TestUtil;
 import org.junit.AfterClass;
 import org.junit.BeforeClass;

@ -64,8 +65,11 @@ public class TestFlushByRamOrCountsPolicy extends LuceneTestCase {
    AtomicInteger numDocs = new AtomicInteger(numDocumentsToIndex);
    Directory dir = newDirectory();
    MockDefaultFlushPolicy flushPolicy = new MockDefaultFlushPolicy();
+    MockAnalyzer analyzer = new MockAnalyzer(random());
+    analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH));
+
    IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT,
-        new MockAnalyzer(random())).setFlushPolicy(flushPolicy);
+        analyzer).setFlushPolicy(flushPolicy);
    final int numDWPT = 1 + atLeast(2);
    DocumentsWriterPerThreadPool threadPool = new ThreadAffinityDocumentsWriterThreadPool(
        numDWPT);
--- a/lucene/core/src/test/org/apache/lucene/index/TestForceMergeForever.java
+++ b/lucene/core/src/test/org/apache/lucene/index/TestForceMergeForever.java
@ -54,7 +54,10 @@ public class TestForceMergeForever extends LuceneTestCase {

  public void test() throws Exception {
    final Directory d = newDirectory();
-    final MyIndexWriter w = new MyIndexWriter(d, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())));
+    MockAnalyzer analyzer = new MockAnalyzer(random());
+    analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH));
+
+    final MyIndexWriter w = new MyIndexWriter(d, newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer));

    // Try to make an index that requires merging:
    w.getConfig().setMaxBufferedDocs(TestUtil.nextInt(random(), 2, 11));
--- a/lucene/core/src/test/org/apache/lucene/index/TestIndexWriterOutOfFileDescriptors.java
+++ b/lucene/core/src/test/org/apache/lucene/index/TestIndexWriterOutOfFileDescriptors.java
@ -51,7 +51,9 @@ public class TestIndexWriterOutOfFileDescriptors extends LuceneTestCase {
        System.out.println("TEST: iter=" + iter);
      }
      try {
-        IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
+        MockAnalyzer analyzer = new MockAnalyzer(random());
+        analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH));
+        IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);

        if (VERBOSE) {
          // Do this ourselves instead of relying on LTC so
--- a/lucene/core/src/test/org/apache/lucene/index/TestIndexWriterWithThreads.java
+++ b/lucene/core/src/test/org/apache/lucene/index/TestIndexWriterWithThreads.java
@ -548,7 +548,10 @@ public class TestIndexWriterWithThreads extends LuceneTestCase {
    final int threadCount = TestUtil.nextInt(random(), 2, 6);

    final AtomicReference<IndexWriter> writerRef = new AtomicReference<IndexWriter>();
-    writerRef.set(new IndexWriter(d, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()))));
+    MockAnalyzer analyzer = new MockAnalyzer(random());
+    analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH));
+
+    writerRef.set(new IndexWriter(d, newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer)));
    final LineFileDocs docs = new LineFileDocs(random());
    final Thread[] threads = new Thread[threadCount];
    final int iters = atLeast(100);
--- a/lucene/core/src/test/org/apache/lucene/index/TestNorms.java
+++ b/lucene/core/src/test/org/apache/lucene/index/TestNorms.java
@ -75,7 +75,10 @@ public class TestNorms extends LuceneTestCase {
  // LUCENE-1260
  public void testCustomEncoder() throws Exception {
    Directory dir = newDirectory();
-    IndexWriterConfig config = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
+    MockAnalyzer analyzer = new MockAnalyzer(random());
+    analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH));
+
+    IndexWriterConfig config = newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
    config.setSimilarity(new CustomNormEncodingSimilarity());
    RandomIndexWriter writer = new RandomIndexWriter(random(), dir, config);
    Document doc = new Document();
--- a/lucene/core/src/test/org/apache/lucene/index/TestRollingUpdates.java
+++ b/lucene/core/src/test/org/apache/lucene/index/TestRollingUpdates.java
@ -46,7 +46,10 @@ public class TestRollingUpdates extends LuceneTestCase {
      Codec.setDefault(TestUtil.alwaysPostingsFormat(new MemoryPostingsFormat(random().nextBoolean(), random.nextFloat())));
    }

-    final IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())));
+    MockAnalyzer analyzer = new MockAnalyzer(random());
+    analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH));
+
+    final IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer));
    final int SIZE = atLeast(20);
    int id = 0;
    IndexReader r = null;
--- a/lucene/core/src/test/org/apache/lucene/index/TestTermsEnum.java
+++ b/lucene/core/src/test/org/apache/lucene/index/TestTermsEnum.java
@ -44,7 +44,9 @@ public class TestTermsEnum extends LuceneTestCase {
    Random random = new Random(random().nextLong());
    final LineFileDocs docs = new LineFileDocs(random, true);
    final Directory d = newDirectory();
-    final RandomIndexWriter w = new RandomIndexWriter(random(), d);
+    MockAnalyzer analyzer = new MockAnalyzer(random());
+    analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH));
+    final RandomIndexWriter w = new RandomIndexWriter(random(), d, analyzer);
    final int numDocs = atLeast(10);
    for(int docCount=0;docCount<numDocs;docCount++) {
      w.addDocument(docs.nextDoc());
--- a/lucene/core/src/test/org/apache/lucene/search/TestSameScoresWithThreads.java
+++ b/lucene/core/src/test/org/apache/lucene/search/TestSameScoresWithThreads.java
@ -24,8 +24,10 @@ import java.util.List;
 import java.util.Map;
 import java.util.concurrent.CountDownLatch;

+import org.apache.lucene.analysis.MockAnalyzer;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.IndexWriter;
 import org.apache.lucene.index.MultiFields;
 import org.apache.lucene.index.RandomIndexWriter;
 import org.apache.lucene.index.Term;
@ -36,13 +38,14 @@ import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.LineFileDocs;
 import org.apache.lucene.util.LuceneTestCase;
 import org.apache.lucene.util.TestUtil;
-import org.apache.lucene.util.TestUtil;

 public class TestSameScoresWithThreads extends LuceneTestCase {

  public void test() throws Exception {
    final Directory dir = newDirectory();
-    final RandomIndexWriter w = new RandomIndexWriter(random(), dir);
+    MockAnalyzer analyzer = new MockAnalyzer(random());
+    analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH));
+    final RandomIndexWriter w = new RandomIndexWriter(random(), dir, analyzer);
    LineFileDocs docs = new LineFileDocs(random());
    int charsToIndex = atLeast(100000);
    int charsIndexed = 0;
--- a/lucene/core/src/test/org/apache/lucene/store/TestNRTCachingDirectory.java
+++ b/lucene/core/src/test/org/apache/lucene/store/TestNRTCachingDirectory.java
@ -38,14 +38,15 @@ import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.LineFileDocs;
 import org.apache.lucene.util.LuceneTestCase;
 import org.apache.lucene.util.TestUtil;
-import org.apache.lucene.util.TestUtil;

 public class TestNRTCachingDirectory extends LuceneTestCase {

  public void testNRTAndCommit() throws Exception {
    Directory dir = newDirectory();
    NRTCachingDirectory cachedDir = new NRTCachingDirectory(dir, 2.0, 25.0);
-    IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
+    MockAnalyzer analyzer = new MockAnalyzer(random());
+    analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH));
+    IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
    RandomIndexWriter w = new RandomIndexWriter(random(), cachedDir, conf);
    final LineFileDocs docs = new LineFileDocs(random(), true);
    final int numDocs = TestUtil.nextInt(random(), 100, 400);
--- a/lucene/core/src/test/org/apache/lucene/util/fst/TestFSTs.java
+++ b/lucene/core/src/test/org/apache/lucene/util/fst/TestFSTs.java
@ -292,7 +292,10 @@ public class TestFSTs extends LuceneTestCase {

    final LineFileDocs docs = new LineFileDocs(random(), true);
    final int RUN_TIME_MSEC = atLeast(500);
-    final IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())).setMaxBufferedDocs(-1).setRAMBufferSizeMB(64);
+    MockAnalyzer analyzer = new MockAnalyzer(random());
+    analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH));
+
+    final IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer).setMaxBufferedDocs(-1).setRAMBufferSizeMB(64);
    final File tempDir = TestUtil.getTempDir("fstlines");
    final Directory dir = newFSDirectory(tempDir);
    final IndexWriter writer = new IndexWriter(dir, conf);
--- a/lucene/grouping/src/java/org/apache/lucene/search/grouping/AbstractAllGroupHeadsCollector.java
+++ b/lucene/grouping/src/java/org/apache/lucene/search/grouping/AbstractAllGroupHeadsCollector.java
@ -128,7 +128,7 @@ public abstract class AbstractAllGroupHeadsCollector<GH extends AbstractAllGroup

  @Override
  public boolean acceptsDocsOutOfOrder() {
-    return true;
+    return false;
  }

  /**
--- a/lucene/grouping/src/test/org/apache/lucene/search/grouping/AllGroupHeadsCollectorTest.java
+++ b/lucene/grouping/src/test/org/apache/lucene/search/grouping/AllGroupHeadsCollectorTest.java
@ -380,10 +380,7 @@ public class AllGroupHeadsCollectorTest extends LuceneTestCase {
            System.out.println("\n===================================================================================");
          }

-          assertEquals(expectedGroupHeads.length, actualGroupHeads.length);
-          for (int i = 0; i < expectedGroupHeads.length; i++) {
-            assertEquals(expectedGroupHeads[i], actualGroupHeads[i]);
-          }
+          assertArrayEquals(expectedGroupHeads, actualGroupHeads);
        }
      } finally {
        QueryUtils.purgeFieldCache(r);
--- a/lucene/memory/src/test/org/apache/lucene/index/memory/MemoryIndexTest.java
+++ b/lucene/memory/src/test/org/apache/lucene/index/memory/MemoryIndexTest.java
@ -436,6 +436,7 @@ public class MemoryIndexTest extends BaseTokenStreamTestCase {
    for (int i = 0; i < numDocs; i++) {
      Directory dir = newDirectory();
      MockAnalyzer mockAnalyzer = new MockAnalyzer(random());
+      mockAnalyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH));
      IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(random(), TEST_VERSION_CURRENT, mockAnalyzer));
      Document nextDoc = lineFileDocs.nextDoc();
      Document doc = new Document();
--- a/lucene/queries/src/java/org/apache/lucene/queries/TermsFilter.java
+++ b/lucene/queries/src/java/org/apache/lucene/queries/TermsFilter.java
@ -222,24 +222,14 @@ public final class TermsFilter extends Filter {
    }
    
    TermsFilter test = (TermsFilter) obj;
-    if (test.hashCode == hashCode && this.termsAndFields.length == test.termsAndFields.length) {
-      // first check the fields before even comparing the bytes
-      for (int i = 0; i < termsAndFields.length; i++) {
-        TermsAndField current = termsAndFields[i];
-        if (!current.equals(test.termsAndFields[i])) {
-          return false;
-        }
+    // first check the fields before even comparing the bytes
+    if (test.hashCode == hashCode && Arrays.equals(termsAndFields, test.termsAndFields)) {
+      int lastOffset = termsAndFields[termsAndFields.length - 1].end;
+      // compare offsets since we sort they must be identical
+      if (ArrayUtil.equals(offsets, 0, test.offsets, 0, lastOffset + 1)) {
+        // straight byte comparison since we sort they must be identical
+        return  ArrayUtil.equals(termsBytes, 0, test.termsBytes, 0, offsets[lastOffset]);
      }
-      // straight byte comparison since we sort they must be identical
-      int end = offsets[termsAndFields.length];
-      byte[] left = this.termsBytes;
-      byte[] right = test.termsBytes;
-      for(int i=0;i < end;i++) {
-        if (left[i] != right[i]) {
-          return false;
-        }
-      }
-      return true;
    }
    return false;
  }
--- a/lucene/queries/src/test/org/apache/lucene/queries/CommonTermsQueryTest.java
+++ b/lucene/queries/src/test/org/apache/lucene/queries/CommonTermsQueryTest.java
@ -17,19 +17,29 @@ package org.apache.lucene.queries;
 * limitations under the License.
 */

+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Random;
+import java.util.Set;
+
+import org.apache.lucene.analysis.MockAnalyzer;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
 import org.apache.lucene.index.AtomicReader;
 import org.apache.lucene.index.DirectoryReader;
 import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.IndexWriter;
 import org.apache.lucene.index.RandomIndexWriter;
 import org.apache.lucene.index.SlowCompositeReaderWrapper;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.index.TermContext;
 import org.apache.lucene.index.Terms;
 import org.apache.lucene.index.TermsEnum;
-import org.apache.lucene.search.BooleanClause;
 import org.apache.lucene.search.BooleanClause.Occur;
+import org.apache.lucene.search.BooleanClause;
 import org.apache.lucene.search.BooleanQuery;
 import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.Query;
@ -45,19 +55,13 @@ import org.apache.lucene.util.PriorityQueue;
 import org.apache.lucene.util.TestUtil;
 import org.junit.Test;

-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.HashSet;
-import java.util.List;
-import java.util.Random;
-import java.util.Set;
-
 public class CommonTermsQueryTest extends LuceneTestCase {
  
  public void testBasics() throws IOException {
    Directory dir = newDirectory();
-    RandomIndexWriter w = new RandomIndexWriter(random(), dir);
+    MockAnalyzer analyzer = new MockAnalyzer(random());
+    analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH));
+    RandomIndexWriter w = new RandomIndexWriter(random(), dir, analyzer);
    String[] docs = new String[] {"this is the end of the world right",
        "is this it or maybe not",
        "this is the end of the universe as we know it",
@ -186,7 +190,9 @@ public class CommonTermsQueryTest extends LuceneTestCase {
  
  public void testMinShouldMatch() throws IOException {
    Directory dir = newDirectory();
-    RandomIndexWriter w = new RandomIndexWriter(random(), dir);
+    MockAnalyzer analyzer = new MockAnalyzer(random());
+    analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH));
+    RandomIndexWriter w = new RandomIndexWriter(random(), dir, analyzer);
    String[] docs = new String[] {"this is the end of the world right",
        "is this it or maybe not",
        "this is the end of the universe as we know it",
@ -344,7 +350,9 @@ public class CommonTermsQueryTest extends LuceneTestCase {
  @Test
  public void testExtend() throws IOException {
    Directory dir = newDirectory();
-    RandomIndexWriter w = new RandomIndexWriter(random(), dir);
+    MockAnalyzer analyzer = new MockAnalyzer(random());
+    analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH));
+    RandomIndexWriter w = new RandomIndexWriter(random(), dir, analyzer);
    String[] docs = new String[] {"this is the end of the world right",
        "is this it or maybe not",
        "this is the end of the universe as we know it",
@ -397,7 +405,9 @@ public class CommonTermsQueryTest extends LuceneTestCase {
  
  public void testRandomIndex() throws IOException {
    Directory dir = newDirectory();
-    RandomIndexWriter w = new RandomIndexWriter(random(), dir);
+    MockAnalyzer analyzer = new MockAnalyzer(random());
+    analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH));
+    RandomIndexWriter w = new RandomIndexWriter(random(), dir, analyzer);
    createRandomIndex(atLeast(50), w, random().nextLong());
    DirectoryReader reader = w.getReader();
    AtomicReader wrapper = SlowCompositeReaderWrapper.wrap(reader);
--- a/lucene/queries/src/test/org/apache/lucene/queries/TermsFilterTest.java
+++ b/lucene/queries/src/test/org/apache/lucene/queries/TermsFilterTest.java
@ -50,7 +50,6 @@ import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.FixedBitSet;
 import org.apache.lucene.util.LuceneTestCase;
 import org.apache.lucene.util.TestUtil;
-import org.apache.lucene.util.TestUtil;

 public class TermsFilterTest extends LuceneTestCase {

@ -297,7 +296,15 @@ public class TermsFilterTest extends LuceneTestCase {
      }
    }
  }
-  
+
+  public void testSingleFieldEquals() {
+    // Two terms with the same hash code
+    assertEquals("AaAaBB".hashCode(), "BBBBBB".hashCode());
+    TermsFilter left = termsFilter(true, new Term("id", "AaAaAa"), new Term("id", "AaAaBB"));
+    TermsFilter right = termsFilter(true, new Term("id", "AaAaAa"), new Term("id", "BBBBBB"));
+    assertFalse(left.equals(right));
+  }
+
  public void testNoTerms() {
    List<Term> emptyTerms = Collections.emptyList();
    List<BytesRef> emptyBytesRef = Collections.emptyList();
--- a/lucene/test-framework/src/java/org/apache/lucene/index/BasePostingsFormatTestCase.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/index/BasePostingsFormatTestCase.java
@ -1379,7 +1379,9 @@ public abstract class BasePostingsFormatTestCase extends LuceneTestCase {
  // during flush/merge
  public void testInvertedWrite() throws Exception {
    Directory dir = newDirectory();
-    IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
+    MockAnalyzer analyzer = new MockAnalyzer(random());
+    analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH));
+    IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);

    // Must be concurrent because thread(s) can be merging
    // while up to one thread flushes, and each of those
--- a/lucene/test-framework/src/java/org/apache/lucene/search/AssertingBulkOutOfOrderScorer.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/search/AssertingBulkOutOfOrderScorer.java
@ -0,0 +1,110 @@
+package org.apache.lucene.search;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.lang.ref.WeakReference;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.Map;
+import java.util.Random;
+import java.util.WeakHashMap;
+
+import org.apache.lucene.index.DocsEnum;
+import org.apache.lucene.util.VirtualMethod;
+
+/** A crazy {@link BulkScorer} that wraps a {@link Scorer}
+ *  but shuffles the order of the collected documents. */
+public class AssertingBulkOutOfOrderScorer extends BulkScorer {
+
+  final Random random;
+  final Scorer scorer;
+
+  public AssertingBulkOutOfOrderScorer(Random random, Scorer scorer) {
+    this.random = random;
+    this.scorer = scorer;
+  }
+
+  private void shuffle(int[] docIDs, float[] scores, int[] freqs, int size) {
+    for (int i = size - 1; i > 0; --i) {
+      final int other = random.nextInt(i + 1);
+
+      final int tmpDoc = docIDs[i];
+      docIDs[i] = docIDs[other];
+      docIDs[other] = tmpDoc;
+
+      final float tmpScore = scores[i];
+      scores[i] = scores[other];
+      scores[other] = tmpScore;
+      
+      final int tmpFreq = freqs[i];
+      freqs[i] = freqs[other];
+      freqs[other] = tmpFreq;
+    }
+  }
+
+  private static void flush(int[] docIDs, float[] scores, int[] freqs, int size,
+      FakeScorer scorer, Collector collector) throws IOException {
+    for (int i = 0; i < size; ++i) {
+      scorer.doc = docIDs[i];
+      scorer.freq = freqs[i];
+      scorer.score = scores[i];
+      collector.collect(scorer.doc);
+    }
+  }
+
+  @Override
+  public boolean score(Collector collector, int max) throws IOException {
+    if (scorer.docID() == -1) {
+      scorer.nextDoc();
+    }
+
+    FakeScorer fake = new FakeScorer();
+    collector.setScorer(fake);
+
+    final int bufferSize = 1 + random.nextInt(100);
+    final int[] docIDs = new int[bufferSize];
+    final float[] scores = new float[bufferSize];
+    final int[] freqs = new int[bufferSize];
+
+    int buffered = 0;
+    int doc = scorer.docID();
+    while (doc < max) {
+      docIDs[buffered] = doc;
+      scores[buffered] = scorer.score();
+      freqs[buffered] = scorer.freq();
+
+      if (++buffered == bufferSize) {
+        shuffle(docIDs, scores, freqs, buffered);
+        flush(docIDs, scores, freqs, buffered, fake, collector);
+        buffered = 0;
+      }
+      doc = scorer.nextDoc();
+    }
+
+    shuffle(docIDs, scores, freqs, buffered);
+    flush(docIDs, scores, freqs, buffered, fake, collector);
+
+    return doc != Scorer.NO_MORE_DOCS;
+  }
+
+  @Override
+  public String toString() {
+    return "AssertingBulkOutOfOrderScorer(" + scorer + ")";
+  }
+}
--- a/lucene/test-framework/src/java/org/apache/lucene/search/AssertingBulkScorer.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/search/AssertingBulkScorer.java
@ -34,18 +34,11 @@ public class AssertingBulkScorer extends BulkScorer {
  private static final VirtualMethod<BulkScorer> SCORE_COLLECTOR = new VirtualMethod<BulkScorer>(BulkScorer.class, "score", Collector.class);
  private static final VirtualMethod<BulkScorer> SCORE_COLLECTOR_RANGE = new VirtualMethod<BulkScorer>(BulkScorer.class, "score", Collector.class, int.class);

-  // we need to track scorers using a weak hash map because otherwise we
-  // could loose references because of eg.
-  // AssertingScorer.score(Collector) which needs to delegate to work correctly
-  private static Map<BulkScorer, WeakReference<AssertingBulkScorer>> ASSERTING_INSTANCES = Collections.synchronizedMap(new WeakHashMap<BulkScorer, WeakReference<AssertingBulkScorer>>());
-
  public static BulkScorer wrap(Random random, BulkScorer other) {
    if (other == null || other instanceof AssertingBulkScorer) {
      return other;
    }
-    final AssertingBulkScorer assertScorer = new AssertingBulkScorer(random, other);
-    ASSERTING_INSTANCES.put(other, new WeakReference<AssertingBulkScorer>(assertScorer));
-    return assertScorer;
+    return new AssertingBulkScorer(random, other);
  }

  public static boolean shouldWrap(BulkScorer inScorer) {
@ -87,4 +80,5 @@ public class AssertingBulkScorer extends BulkScorer {
  public String toString() {
    return "AssertingBulkScorer(" + in + ")";
  }
+
 }
--- a/lucene/test-framework/src/java/org/apache/lucene/search/AssertingWeight.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/search/AssertingWeight.java
@ -29,12 +29,14 @@ class AssertingWeight extends Weight {
    return other instanceof AssertingWeight ? other : new AssertingWeight(random, other);
  }

+  final boolean scoresDocsOutOfOrder;
  final Random random;
  final Weight in;

  AssertingWeight(Random random, Weight in) {
    this.random = random;
    this.in = in;
+    scoresDocsOutOfOrder = in.scoresDocsOutOfOrder() || random.nextBoolean();
  }

  @Override
@ -73,8 +75,21 @@ class AssertingWeight extends Weight {
    if (inScorer == null) {
      return null;
    }
+
    if (AssertingBulkScorer.shouldWrap(inScorer)) {
+      // The incoming scorer already has a specialized
+      // implementation for BulkScorer, so we should use it:
      return AssertingBulkScorer.wrap(new Random(random.nextLong()), inScorer);
+    } else if (scoreDocsInOrder == false && random.nextBoolean()) {
+      // The caller claims it can handle out-of-order
+      // docs; let's confirm that by pulling docs and
+      // randomly shuffling them before collection:
+      //Scorer scorer = in.scorer(context, acceptDocs);
+      Scorer scorer = scorer(context, acceptDocs);
+
+      // Scorer should not be null if bulkScorer wasn't:
+      assert scorer != null;
+      return new AssertingBulkOutOfOrderScorer(new Random(random.nextLong()), scorer);
    } else {
      // Let super wrap this.scorer instead, so we use
      // AssertingScorer:
@ -84,8 +99,7 @@ class AssertingWeight extends Weight {

  @Override
  public boolean scoresDocsOutOfOrder() {
-    return in.scoresDocsOutOfOrder();
+    return scoresDocsOutOfOrder;
  }
-
 }

--- a/lucene/test-framework/src/java/org/apache/lucene/search/QueryUtils.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/search/QueryUtils.java
@ -329,7 +329,7 @@ public class QueryUtils {

          @Override
          public boolean acceptsDocsOutOfOrder() {
-            return true;
+            return false;
          }
        });

--- a/lucene/test-framework/src/java/org/apache/lucene/search/ShardSearchingTestBase.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/search/ShardSearchingTestBase.java
@ -449,7 +449,9 @@ public abstract class ShardSearchingTestBase extends LuceneTestCase {
      myNodeID = nodeID;
      dir = newFSDirectory(TestUtil.getTempDir("ShardSearchingTestBase"));
      // TODO: set warmer
-      IndexWriterConfig iwc = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random));
+      MockAnalyzer analyzer = new MockAnalyzer(random());
+      analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH));
+      IndexWriterConfig iwc = new IndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
      iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
      if (VERBOSE) {
        iwc.setInfoStream(new PrintStreamInfoStream(System.out));
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@ -138,6 +138,12 @@ Bug Fixes
 * SOLR-5818: distrib search with custom comparator does not quite work correctly
  (Ryan Ernst)

+* SOLR-5834: Overseer threads are only being interrupted and not closed.
+  (hossman, Mark Miller)
+
+* SOLR-5839: ZookeeperInfoServlet does not trim path properly.
+  (Furkan KAMACI via Mark Miller)
+
 Optimizations
 ----------------------
 * SOLR-1880: Distributed Search skips GET_FIELDS stage if EXECUTE_QUERY
@ -181,6 +187,13 @@ Other Changes
 * SOLR-5796: Make how long we are willing to wait for a core to see the ZK
  advertised leader in it's local state configurable. 
  (Timothy Potter via Mark Miller)
+  
+* SOLR-5825: Separate http request creating and execution in SolrJ
+  (Steven Bower via Erick Erickson)
+  
+* SOLR-5837: Add hashCode/equals to SolrDocument, SolrInputDocument
+  and SolrInputField for testing purposes. (Varun Thacker, Noble Paul,
+  Mark Miller)

 ==================  4.7.0 ==================

--- a/solr/core/src/java/org/apache/solr/cloud/Overseer.java
+++ b/solr/core/src/java/org/apache/solr/cloud/Overseer.java
@ -81,8 +81,8 @@ public class Overseer {
    //Internal queue where overseer stores events that have not yet been published into cloudstate
    //If Overseer dies while extracting the main queue a new overseer will start from this queue 
    private final DistributedQueue workQueue;
-    private volatile boolean isClosed;
    private Map clusterProps;
+    private boolean isClosed = false;

    public ClusterStateUpdater(final ZkStateReader reader, final String myId) {
      this.zkClient = reader.getZkClient();
@ -1030,20 +1030,22 @@ public class Overseer {

  class OverseerThread extends Thread implements ClosableThread {

-    private volatile boolean isClosed;
+    protected volatile boolean isClosed;
+    private ClosableThread thread;

-    public OverseerThread(ThreadGroup tg,
-        ClusterStateUpdater clusterStateUpdater) {
-      super(tg, clusterStateUpdater);
+    public OverseerThread(ThreadGroup tg, ClosableThread thread) {
+      super(tg, (Runnable) thread);
+      this.thread = thread;
    }

-    public OverseerThread(ThreadGroup ccTg,
-        OverseerCollectionProcessor overseerCollectionProcessor, String string) {
-      super(ccTg, overseerCollectionProcessor, string);
+    public OverseerThread(ThreadGroup ccTg, ClosableThread thread, String name) {
+      super(ccTg, (Runnable) thread, name);
+      this.thread = thread;
    }

    @Override
    public void close() {
+      thread.close();
      this.isClosed = true;
    }

@ -1084,8 +1086,7 @@ public class Overseer {
    ThreadGroup ccTg = new ThreadGroup("Overseer collection creation process.");

    ocp = new OverseerCollectionProcessor(reader, id, shardHandler, adminPath);
-    ccThread = new OverseerThread(ccTg, ocp,
-        "Overseer-" + id);
+    ccThread = new OverseerThread(ccTg, ocp, "Overseer-" + id);
    ccThread.setDaemon(true);
    
    updaterThread.start();
--- a/solr/core/src/java/org/apache/solr/servlet/ZookeeperInfoServlet.java
+++ b/solr/core/src/java/org/apache/solr/servlet/ZookeeperInfoServlet.java
@ -195,7 +195,7 @@ public final class ZookeeperInfoServlet extends HttpServlet {
      if (path == null) {
        path = "/";
      } else {
-        path.trim();
+        path = path.trim();
        if (path.length() == 0) {
          path = "/";
        }
--- a/solr/core/src/test/org/apache/solr/update/DocumentBuilderTest.java
+++ b/solr/core/src/test/org/apache/solr/update/DocumentBuilderTest.java
@ -23,7 +23,9 @@ import org.apache.lucene.index.AtomicReader;
 import org.apache.lucene.index.NumericDocValues;
 import org.apache.lucene.index.SlowCompositeReaderWrapper;
 import org.apache.lucene.search.similarities.DefaultSimilarity;
+import org.apache.lucene.util.TestUtil;
 import org.apache.solr.SolrTestCaseJ4;
+import org.apache.solr.common.SolrDocument;
 import org.apache.solr.common.params.CommonParams;
 import org.apache.solr.common.SolrException;
 import org.apache.solr.common.SolrInputDocument;
@ -394,4 +396,106 @@ public class DocumentBuilderTest extends SolrTestCaseJ4 {
    assertNull(h.validateUpdate(add(xml, new String[0])));
  }

+  public void testSolrDocumentEquals() {
+
+    String randomString = TestUtil.randomSimpleString(random());
+
+    SolrDocument doc1 = new SolrDocument();
+    doc1.addField("foo", randomString);
+
+    SolrDocument doc2 = new SolrDocument();
+    doc2.addField("foo", randomString);
+
+    assertTrue(doc1.equals(doc2));
+
+    doc1.addField("foo", "bar");
+
+    assertFalse(doc1.equals(doc2));
+
+    doc1 = new SolrDocument();
+    doc1.addField("bar", randomString);
+
+    assertFalse(doc1.equals(doc2));
+
+    int randomInt = random().nextInt();
+    doc1 = new SolrDocument();
+    doc1.addField("foo", randomInt);
+    doc2 = new SolrDocument();
+    doc2.addField("foo", randomInt);
+
+    assertTrue(doc1.equals(doc2));
+
+    doc2 = new SolrDocument();
+    doc2.addField("bar", randomInt);
+
+    assertFalse(doc1.equals(doc2));
+
+  }
+
+  public void testSolrInputDocumentEquality() {
+
+    String randomString = TestUtil.randomSimpleString(random());
+
+    SolrInputDocument doc1 = new SolrInputDocument();
+    doc1.addField("foo", randomString);
+    SolrInputDocument doc2 = new SolrInputDocument();
+    doc2.addField("foo", randomString);
+
+    assertTrue(doc1.equals(doc2));
+
+    doc1.setDocumentBoost(1.1f);
+    assertFalse(doc1.equals(doc2));
+
+    doc2.setDocumentBoost(1.1f);
+    assertTrue(doc1.equals(doc2));
+
+    doc2.setDocumentBoost(20f);
+    assertFalse(doc1.equals(doc2));
+
+
+    doc1 = new SolrInputDocument();
+    doc1.addField("foo", randomString);
+    doc2 = new SolrInputDocument();
+    doc2.addField("foo", randomString);
+
+    SolrInputDocument childDoc = new SolrInputDocument();
+    childDoc.addField("foo", "bar");
+
+    doc1.addChildDocument(childDoc);
+    assertFalse(doc1.equals(doc2));
+
+    doc2.addChildDocument(childDoc);
+    assertTrue(doc1.equals(doc2));
+
+    SolrInputDocument childDoc1 = new SolrInputDocument();
+    childDoc.addField(TestUtil.randomSimpleString(random()), TestUtil.randomSimpleString(random()));
+    doc2.addChildDocument(childDoc1);
+    assertFalse(doc1.equals(doc2));
+
+  }
+
+  public void testSolrInputFieldEquality() {
+    String randomString = TestUtil.randomSimpleString(random(), 10, 20);
+
+    int val = random().nextInt();
+    SolrInputField sif1 = new SolrInputField(randomString);
+    sif1.setValue(val, 1.0f);
+    SolrInputField sif2 = new SolrInputField(randomString);
+    sif2.setValue(val, 1.0f);
+
+    assertTrue(sif1.equals(sif2));
+
+    sif1.setBoost(2.1f);
+    sif2.setBoost(2.1f);
+    assertTrue(sif1.equals(sif2));
+
+    sif2.setBoost(2.0f);
+    assertFalse(sif1.equals(sif2));
+
+    sif2.setName("foo");
+    assertFalse(sif1.equals(sif2));
+
+
+  }
+
 }
--- a/solr/solrj/src/java/org/apache/solr/client/solrj/impl/HttpSolrServer.java
+++ b/solr/solrj/src/java/org/apache/solr/client/solrj/impl/HttpSolrServer.java
@ -199,8 +199,11 @@ public class HttpSolrServer extends SolrServer {
    return request(request, responseParser);
  }
  
-  public NamedList<Object> request(final SolrRequest request,
-      final ResponseParser processor) throws SolrServerException, IOException {
+  public NamedList<Object> request(final SolrRequest request, final ResponseParser processor) throws SolrServerException, IOException {
+    return executeMethod(createMethod(request),processor);
+  }
+  
+  protected HttpRequestBase createMethod(final SolrRequest request) throws IOException, SolrServerException {
    HttpRequestBase method = null;
    InputStream is = null;
    SolrParams params = request.getParams();
@ -382,6 +385,10 @@ public class HttpSolrServer extends SolrServer {
      throw new SolrServerException("error reading streams", ex);
    }
    
+    return method;
+  }
+  
+  protected NamedList<Object> executeMethod(HttpRequestBase method, final ResponseParser processor) throws SolrServerException {
    // XXX client already has this set, is this needed?
    method.getParams().setParameter(ClientPNames.HANDLE_REDIRECTS,
        followRedirects);
--- a/solr/solrj/src/java/org/apache/solr/common/SolrDocument.java
+++ b/solr/solrj/src/java/org/apache/solr/common/SolrDocument.java
@ -213,7 +213,41 @@ public class SolrDocument implements Map<String,Object>, Iterable<Map.Entry<Stri
  public Iterator<Entry<String, Object>> iterator() {
    return _fields.entrySet().iterator();
  }
-  
+  /**
+   * This method is implemented for tests and should not be counted
+   * on in production code.
+   * 
+   * @lucene.experimental
+   */
+  @Override
+  public boolean equals(Object o) {
+    if (this == o) {
+      return true;
+    }
+    if (!(o instanceof SolrDocument)) {
+      return false;
+    }
+
+    SolrDocument solrDocument = (SolrDocument) o;
+
+    if (!_fields.equals(solrDocument._fields)) {
+      return false;
+    }
+
+    return true;
+  }
+
+  /**
+   * This method is implemented for tests and should not be counted
+   * on in production code.
+   * 
+   * @lucene.experimental
+   */
+  @Override
+  public int hashCode() {
+    return _fields.hashCode();
+  }
+
  //-----------------------------------------------------------------------------------------
  // JSTL Helpers
  //-----------------------------------------------------------------------------------------
--- a/solr/solrj/src/java/org/apache/solr/common/SolrInputDocument.java
+++ b/solr/solrj/src/java/org/apache/solr/common/SolrInputDocument.java
@ -275,7 +275,51 @@ public class SolrInputDocument implements Map<String,SolrInputField>, Iterable<S
  public Collection<SolrInputField> values() {
    return _fields.values();
  }
-  
+
+  /**
+   * This method is implemented for tests and should not be counted
+   * on in production code.
+   * 
+   * @lucene.experimental
+   */
+  @Override
+  public boolean equals(Object o) {
+    if (this == o) {
+      return true;
+    }
+    if (!(o instanceof SolrInputDocument)) {
+      return false;
+    }
+
+    SolrInputDocument sdoc = (SolrInputDocument) o;
+
+    if (!_fields.equals(sdoc._fields)) {
+      return false;
+    }
+    if (Float.compare(sdoc._documentBoost, _documentBoost) != 0) {
+      return false;
+    }
+    if (_childDocuments != null ? !_childDocuments.equals(sdoc._childDocuments) : sdoc._childDocuments != null) {
+      return false;
+    }
+
+    return true;
+  }
+
+  /**
+   * This method is implemented for tests and should not be counted
+   * on in production code.
+   * 
+   * @lucene.experimental
+   */
+  @Override
+  public int hashCode() {
+    int result = _fields.hashCode();
+    result = 31 * result + (_documentBoost != +0.0f ? Float.floatToIntBits(_documentBoost) : 0);
+    result = 31 * result + (_childDocuments != null ? _childDocuments.hashCode() : 0);
+    return result;
+  }
+
  public void addChildDocument(SolrInputDocument child) {
   if (_childDocuments == null) {
     _childDocuments = new ArrayList<SolrInputDocument>();
--- a/solr/solrj/src/java/org/apache/solr/common/SolrInputField.java
+++ b/solr/solrj/src/java/org/apache/solr/common/SolrInputField.java
@ -229,4 +229,50 @@ public class SolrInputField implements Iterable<Object>, Serializable
    }
    return clone;
  }
+
+  /**
+   * This method is implemented for tests and should not be counted
+   * on in production code.
+   * 
+   * @lucene.experimental
+   */
+  @Override
+  public boolean equals(Object o) {
+    if (this == o) {
+      return true;
+    }
+    if (!(o instanceof SolrInputField)) {
+      return false;
+    }
+
+    SolrInputField sif = (SolrInputField) o;
+
+    if (!name.equals(sif.name)) {
+      return false;
+    }
+
+    if (!value.equals(sif.value)) {
+      return false;
+    }
+
+    if (Float.compare(sif.boost, boost) != 0) {
+      return false;
+    }
+
+    return true;
+  }
+
+  /**
+   * This method is implemented for tests and should not be counted
+   * on in production code.
+   * 
+   * @lucene.experimental
+   */
+  @Override
+  public int hashCode() {
+    int result = name.hashCode();
+    result = 31 * result + value.hashCode();
+    result = 31 * result + (boost != +0.0f ? Float.floatToIntBits(boost) : 0);
+    return result;
+  }
 }
--- a/solr/solrj/src/java/org/apache/solr/common/util/JavaBinCodec.java
+++ b/solr/solrj/src/java/org/apache/solr/common/util/JavaBinCodec.java
@ -521,7 +521,7 @@ public class JavaBinCodec {
      public Object getValue() {
        return value;
      }
-      
+
      @Override
      public String toString() {
        return "MapEntry[" + key.toString() + ":" + value.toString() + "]";
@ -530,7 +530,28 @@ public class JavaBinCodec {
      @Override
      public Object setValue(Object value) {
        throw new UnsupportedOperationException();
-      }};
+      }
+
+      @Override
+      public int hashCode() {
+        int result = 31;
+        result *=31 + getKey().hashCode();
+        result *=31 + getValue().hashCode();
+        return result;
+      }
+
+      @Override
+      public boolean equals(Object obj) {
+        if(this == obj) {
+          return true;
+        }
+        if(!(obj instanceof Entry)) {
+          return false;
+        }
+        Map.Entry<Object, Object> entry = (Entry<Object, Object>) obj;
+        return (this.getKey().equals(entry.getKey()) && this.getValue().equals(entry.getValue()));
+      }
+    };
  }

  /**
--- a/solr/solrj/src/test-files/solrj/javabin_backcompat.bin
+++ b/solr/solrj/src/test-files/solrj/javabin_backcompat.bin
--- a/solr/solrj/src/test/org/apache/solr/common/util/TestJavaBinCodec.java
+++ b/solr/solrj/src/test/org/apache/solr/common/util/TestJavaBinCodec.java
@ -17,17 +17,37 @@ package org.apache.solr.common.util;
 * limitations under the License.
 */

+import java.io.BufferedOutputStream;
 import java.io.ByteArrayInputStream;
 import java.io.ByteArrayOutputStream;
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Date;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;

+import org.apache.commons.io.IOUtils;
 import org.apache.lucene.util.LuceneTestCase;
 import org.apache.lucene.util.TestUtil;
+import org.apache.solr.common.EnumFieldValue;
+import org.apache.solr.common.SolrDocument;
+import org.apache.solr.common.SolrDocumentList;
+import org.apache.solr.common.SolrInputDocument;
+import org.junit.Test;

 public class TestJavaBinCodec extends LuceneTestCase {
-  
+
+ private static final String SOLRJ_JAVABIN_BACKCOMPAT_BIN = "/solrj/javabin_backcompat.bin";
+private final String BIN_FILE_LOCATION = "./solr/solrj/src/test-files/solrj/javabin_backcompat.bin";
+
 public void testStrings() throws Exception {
    JavaBinCodec javabin = new JavaBinCodec();
-    for (int i = 0; i < 10000*RANDOM_MULTIPLIER; i++) {
+    for (int i = 0; i < 10000 * RANDOM_MULTIPLIER; i++) {
      String s = TestUtil.randomUnicodeString(random());
      ByteArrayOutputStream os = new ByteArrayOutputStream();
      javabin.marshal(s, os);
@ -36,4 +56,158 @@ public class TestJavaBinCodec extends LuceneTestCase {
      assertEquals(s, o);
    }
  }
+
+  private List<Object> generateAllDataTypes() {
+    List<Object> types = new ArrayList<>();
+
+    types.add(null); //NULL
+    types.add(true);
+    types.add(false);
+    types.add((byte) 1);
+    types.add((short) 2);
+    types.add((double) 3);
+
+    types.add(-4);
+    types.add(4);
+    types.add(42);
+
+    types.add((long) -5);
+    types.add((long) 5);
+    types.add((long) 50);
+
+    types.add((float) 6);
+    types.add(new Date(0));
+
+    Map<Integer, Integer> map = new HashMap<>();
+    map.put(1, 2);
+    types.add(map);
+
+    SolrDocument doc = new SolrDocument();
+    doc.addField("foo", "bar");
+    types.add(doc);
+
+    SolrDocumentList solrDocs = new SolrDocumentList();
+    solrDocs.setMaxScore(1.0f);
+    solrDocs.setNumFound(1);
+    solrDocs.setStart(0);
+    solrDocs.add(0, doc);
+    types.add(solrDocs);
+
+    types.add(new byte[] {1,2,3,4,5});
+
+    // TODO?
+    // List<String> list = new ArrayList<String>();
+    // list.add("one");
+    // types.add(list.iterator());
+
+    types.add((byte) 15); //END
+
+    SolrInputDocument idoc = new SolrInputDocument();
+    idoc.addField("foo", "bar");
+    types.add(idoc);
+
+    SolrInputDocument parentDoc = new SolrInputDocument();
+    parentDoc.addField("foo", "bar");
+    SolrInputDocument childDoc = new SolrInputDocument();
+    childDoc.addField("foo", "bar");
+    parentDoc.addChildDocument(childDoc);
+    types.add(parentDoc);
+
+    types.add(new EnumFieldValue(1, "foo"));
+
+    types.add(map.entrySet().iterator().next()); //Map.Entry
+
+    types.add((byte) (1 << 5)); //TAG_AND_LEN
+
+    types.add("foo");
+    types.add(1);
+    types.add((long) 2);
+
+    SimpleOrderedMap simpleOrderedMap = new SimpleOrderedMap();
+    simpleOrderedMap.add("bar", "barbar");
+    types.add(simpleOrderedMap);
+
+    NamedList<String> nl = new NamedList<>();
+    nl.add("foo", "barbar");
+    types.add(nl);
+
+    return types;
+  }
+
+  @Test
+  public void testBackCompat() {
+    List iteratorAsList = null;
+    JavaBinCodec javabin = new JavaBinCodec(){
+      @Override
+      public List<Object> readIterator(DataInputInputStream fis) throws IOException {
+        return super.readIterator(fis);
+      }
+    };
+    try {
+      InputStream is = getClass().getResourceAsStream(SOLRJ_JAVABIN_BACKCOMPAT_BIN);
+      List<Object> unmarshaledObj = (List<Object>) javabin.unmarshal(is);
+      List<Object> matchObj = generateAllDataTypes();
+
+      assertEquals(unmarshaledObj.size(), matchObj.size());
+      for(int i=0; i < unmarshaledObj.size(); i++) {
+
+        if(unmarshaledObj.get(i) instanceof byte[] && matchObj.get(i) instanceof byte[]) {
+          byte[] b1 = (byte[]) unmarshaledObj.get(i);
+          byte[] b2 = (byte[]) matchObj.get(i);
+          assertTrue(Arrays.equals(b1, b2));
+
+        } else {
+          assertEquals(unmarshaledObj.get(i), matchObj.get(i));
+        }
+
+      }
+    } catch (IOException e) {
+      fail(e.getMessage());
+    }
+
+  }
+
+  @Test
+  public void testForwardCompat() {
+    JavaBinCodec javabin = new JavaBinCodec();
+    ByteArrayOutputStream os = new ByteArrayOutputStream();
+
+    Object data = generateAllDataTypes();
+    try {
+      javabin.marshal(data, os);
+      byte[] newFormatBytes = os.toByteArray();
+
+      InputStream is = getClass().getResourceAsStream(SOLRJ_JAVABIN_BACKCOMPAT_BIN);
+      byte[] currentFormatBytes = IOUtils.toByteArray(is);
+
+      for (int i = 1; i < currentFormatBytes.length; i++) {//ignore the first byte. It is version information
+        assertEquals(currentFormatBytes[i], newFormatBytes[i]);
+      }
+
+    } catch (IOException e) {
+      e.printStackTrace();
+      fail(e.getMessage());
+    }
+
+  }
+
+  public void genBinaryFile() throws IOException {
+    JavaBinCodec javabin = new JavaBinCodec();
+    ByteArrayOutputStream os = new ByteArrayOutputStream();
+    
+    Object data = generateAllDataTypes();
+    
+    javabin.marshal(data, os);
+    byte[] out = os.toByteArray();
+    FileOutputStream fs = new FileOutputStream(new File(BIN_FILE_LOCATION));
+    BufferedOutputStream bos = new BufferedOutputStream(fs);
+    bos.write(out);
+    bos.close();
+  }
+
+  public static void main(String[] args) throws IOException {
+    TestJavaBinCodec test = new TestJavaBinCodec();
+    test.genBinaryFile();
+  }
+
 }