diff --git a/build.xml b/build.xml index 78f5ba4848e..5c6610d388b 100644 --- a/build.xml +++ b/build.xml @@ -18,6 +18,12 @@ --> + + + + + + @@ -268,10 +274,6 @@ - - - - diff --git a/extra-targets.xml b/extra-targets.xml index 2f3540a16c9..352c055346b 100644 --- a/extra-targets.xml +++ b/extra-targets.xml @@ -92,7 +92,7 @@ - + @@ -107,8 +107,6 @@ import org.tmatesoft.svn.core.wc.*; import org.apache.tools.ant.Project; - def RECOMMENDED_SVNKIT_18 = '1.8.2'; - SVNClientManager manager = SVNClientManager.newInstance(); SVNStatusClient statusClient = manager.getStatusClient(); SVNWCClient wcClient = manager.getWCClient(); @@ -124,11 +122,7 @@ def ec = ex.getErrorMessage().getErrorCode(); int code = ec.getCode(); int category = ec.getCategory(); - if (code == SVNErrorCode.WC_UNSUPPORTED_FORMAT.getCode()) { - task.log('WARNING: Unsupported SVN working copy version! Disabling checks...', Project.MSG_WARN); - task.log('If your working copy is on version 1.8 already, please pass -Dsvnkit.version=' + RECOMMENDED_SVNKIT_18 + ' to successfully run checks.', Project.MSG_INFO); - return; - } else if (code == SVNErrorCode.WC_NOT_DIRECTORY.getCode() || code == SVNErrorCode.WC_NOT_FILE.getCode()) { + if (code == SVNErrorCode.WC_NOT_DIRECTORY.getCode() || code == SVNErrorCode.WC_NOT_FILE.getCode()) { task.log('WARNING: Development directory is not an SVN checkout! Disabling checks...', Project.MSG_WARN); return; } else if (category == SVNErrorCode.WC_CATEGORY) { diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 7e2fc66352e..783f87ce747 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -147,15 +147,33 @@ Bug fixes recursive affix application are driven correctly by continuation classes in the affix file. (Robert Muir) +* LUCENE-5497: HunspellStemFilter properly handles escaped terms and affixes without conditions. + (Robert Muir) + +* LUCENE-5505: HunspellStemFilter ignores BOM markers in dictionaries and handles varying + types of whitespace in SET/FLAG commands. (Robert Muir) + +* LUCENE-5507: Fix HunspellStemFilter loading of dictionaries with large amounts of aliases + etc before the encoding declaration. (Robert Muir) + +* LUCENE-5502: Fixed TermsFilter.equals that could return true for different + filters. (Igor Motov via Adrien Grand) + Test Framework * LUCENE-5449: Rename _TestUtil and _TestHelper to remove the leading _. +* LUCENE-5501: Added random out-of-order collection testing (when the collector + supports it) to AssertingIndexSearcher. (Adrien Grand) + Build * LUCENE-5463: RamUsageEstimator.(human)sizeOf(Object) is now a forbidden API. (Adrien Grand, Robert Muir) +* LUCENE-5511: "ant precommit" / "ant check-svn-working-copy" now work again + with any working copy format (thanks to svnkit 1.8.4). (Uwe Schindler) + ======================= Lucene 4.7.0 ======================= New Features @@ -188,7 +206,7 @@ New Features AnalyzingInfixSuggester but boosts suggestions that matched tokens with lower positions. (Remi Melisson via Mike McCandless) -* LUCENE-4399: When sorting by String (SortField.STRING), you can now +* LUCENE-5399: When sorting by String (SortField.STRING), you can now specify whether missing values should be sorted first (the default), using SortField.setMissingValue(SortField.STRING_FIRST), or last, using SortField.setMissingValue(SortField.STRING_LAST). (Rob Muir, diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Dictionary.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Dictionary.java index 974d24185db..0c8ad44cf16 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Dictionary.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Dictionary.java @@ -35,12 +35,16 @@ import org.apache.lucene.util.fst.Outputs; import org.apache.lucene.util.fst.Util; import java.io.BufferedInputStream; +import java.io.BufferedOutputStream; import java.io.BufferedReader; import java.io.File; +import java.io.FileInputStream; +import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.io.LineNumberReader; +import java.io.OutputStream; import java.nio.charset.Charset; import java.nio.charset.CharsetDecoder; import java.nio.charset.CodingErrorAction; @@ -54,6 +58,7 @@ import java.util.List; import java.util.Locale; import java.util.Map; import java.util.TreeMap; +import java.util.regex.Matcher; import java.util.regex.Pattern; /** @@ -154,21 +159,41 @@ public class Dictionary { this.ignoreCase = ignoreCase; this.needsInputCleaning = ignoreCase; this.needsOutputCleaning = false; // set if we have an OCONV - // TODO: we really need to probably buffer this on disk since so many newer dictionaries - // (en_GB, hu_HU, etc) now have tons of AM lines (morph metadata) etc before they finally declare - // their encoding... but for now this large buffer is a workaround - BufferedInputStream buffered = new BufferedInputStream(affix, 65536); - buffered.mark(65536); - String encoding = getDictionaryEncoding(buffered); - buffered.reset(); - CharsetDecoder decoder = getJavaEncoding(encoding); - readAffixFile(buffered, decoder); flagLookup.add(new BytesRef()); // no flags -> ord 0 stripLookup.add(new BytesRef()); // no strip -> ord 0 - IntSequenceOutputs o = IntSequenceOutputs.getSingleton(); - Builder b = new Builder(FST.INPUT_TYPE.BYTE4, o); - readDictionaryFiles(dictionaries, decoder, b); - words = b.finish(); + + File aff = File.createTempFile("affix", "aff", tempDir); + OutputStream out = new BufferedOutputStream(new FileOutputStream(aff)); + InputStream aff1 = null; + InputStream aff2 = null; + try { + // copy contents of affix stream to temp file + final byte [] buffer = new byte [1024 * 8]; + int len; + while ((len = affix.read(buffer)) > 0) { + out.write(buffer, 0, len); + } + out.close(); + + // pass 1: get encoding + aff1 = new BufferedInputStream(new FileInputStream(aff)); + String encoding = getDictionaryEncoding(aff1); + + // pass 2: parse affixes + CharsetDecoder decoder = getJavaEncoding(encoding); + aff2 = new BufferedInputStream(new FileInputStream(aff)); + readAffixFile(aff2, decoder); + + // read dictionary entries + IntSequenceOutputs o = IntSequenceOutputs.getSingleton(); + Builder b = new Builder(FST.INPUT_TYPE.BYTE4, o); + readDictionaryFiles(dictionaries, decoder, b); + words = b.finish(); + aliases = null; // no longer needed + } finally { + IOUtils.closeWhileHandlingException(out, aff1, aff2); + aff.delete(); + } } /** @@ -251,6 +276,10 @@ public class Dictionary { LineNumberReader reader = new LineNumberReader(new InputStreamReader(affixStream, decoder)); String line = null; while ((line = reader.readLine()) != null) { + // ignore any BOM marker on first line + if (reader.getLineNumber() == 1 && line.startsWith("\uFEFF")) { + line = line.substring(1); + } if (line.startsWith(ALIAS_KEY)) { parseAlias(line); } else if (line.startsWith(PREFIX_KEY)) { @@ -348,8 +377,10 @@ public class Dictionary { String line = reader.readLine(); String ruleArgs[] = line.split("\\s+"); - if (ruleArgs.length < 5) { - throw new ParseException("The affix file contains a rule with less than five elements", reader.getLineNumber()); + // from the manpage: PFX flag stripping prefix [condition [morphological_fields...]] + // condition is optional + if (ruleArgs.length < 4) { + throw new ParseException("The affix file contains a rule with less than four elements: " + line, reader.getLineNumber()); } char flag = flagParsingStrategy.parseFlag(ruleArgs[1]); @@ -370,7 +401,7 @@ public class Dictionary { Arrays.sort(appendFlags); } - String condition = ruleArgs[4]; + String condition = ruleArgs.length > 4 ? ruleArgs[4] : "."; // at least the gascon affix file has this issue if (condition.startsWith("[") && !condition.endsWith("]")) { condition = condition + "]"; @@ -464,6 +495,9 @@ public class Dictionary { return builder.finish(); } + + /** pattern accepts optional BOM + SET + any whitespace */ + final static Pattern ENCODING_PATTERN = Pattern.compile("^(\u00EF\u00BB\u00BF)?SET\\s+"); /** * Parses the encoding specified in the affix file readable through the provided InputStream @@ -473,7 +507,7 @@ public class Dictionary { * @throws IOException Can be thrown while reading from the InputStream * @throws ParseException Thrown if the first non-empty non-comment line read from the file does not adhere to the format {@code SET } */ - private String getDictionaryEncoding(InputStream affix) throws IOException, ParseException { + static String getDictionaryEncoding(InputStream affix) throws IOException, ParseException { final StringBuilder encoding = new StringBuilder(); for (;;) { encoding.setLength(0); @@ -496,9 +530,10 @@ public class Dictionary { } continue; } - if (encoding.length() > 4 && "SET ".equals(encoding.substring(0, 4))) { - // cleanup the encoding string, too (whitespace) - return encoding.substring(4).trim(); + Matcher matcher = ENCODING_PATTERN.matcher(encoding); + if (matcher.find()) { + int last = matcher.end(); + return encoding.substring(last).trim(); } } } @@ -536,8 +571,12 @@ public class Dictionary { * @param flagLine Line containing the flag information * @return FlagParsingStrategy that handles parsing flags in the way specified in the FLAG definition */ - private FlagParsingStrategy getFlagParsingStrategy(String flagLine) { - String flagType = flagLine.substring(5); + static FlagParsingStrategy getFlagParsingStrategy(String flagLine) { + String parts[] = flagLine.split("\\s+"); + if (parts.length != 2) { + throw new IllegalArgumentException("Illegal FLAG specification: " + flagLine); + } + String flagType = parts[1]; if (NUM_FLAG_TYPE.equals(flagType)) { return new NumFlagParsingStrategy(); @@ -550,6 +589,24 @@ public class Dictionary { throw new IllegalArgumentException("Unknown flag type: " + flagType); } + final char FLAG_SEPARATOR = 0x1f; // flag separator after escaping + + String unescapeEntry(String entry) { + StringBuilder sb = new StringBuilder(); + for (int i = 0; i < entry.length(); i++) { + char ch = entry.charAt(i); + if (ch == '\\' && i+1 < entry.length()) { + sb.append(entry.charAt(i+1)); + i++; + } else if (ch == '/') { + sb.append(FLAG_SEPARATOR); + } else { + sb.append(ch); + } + } + return sb.toString(); + } + /** * Reads the dictionary file through the provided InputStreams, building up the words map * @@ -570,8 +627,9 @@ public class Dictionary { String line = lines.readLine(); // first line is number of entries (approximately, sometimes) while ((line = lines.readLine()) != null) { + line = unescapeEntry(line); if (needsInputCleaning) { - int flagSep = line.lastIndexOf('/'); + int flagSep = line.lastIndexOf(FLAG_SEPARATOR); if (flagSep == -1) { CharSequence cleansed = cleanInput(line, sb); writer.write(cleansed.toString().getBytes(IOUtils.CHARSET_UTF_8)); @@ -604,7 +662,7 @@ public class Dictionary { scratch1.length = o1.length; for (int i = scratch1.length - 1; i >= 0; i--) { - if (scratch1.bytes[scratch1.offset + i] == '/') { + if (scratch1.bytes[scratch1.offset + i] == FLAG_SEPARATOR) { scratch1.length = i; break; } @@ -615,7 +673,7 @@ public class Dictionary { scratch2.length = o2.length; for (int i = scratch2.length - 1; i >= 0; i--) { - if (scratch2.bytes[scratch2.offset + i] == '/') { + if (scratch2.bytes[scratch2.offset + i] == FLAG_SEPARATOR) { scratch2.length = i; break; } @@ -648,7 +706,7 @@ public class Dictionary { String entry; char wordForm[]; - int flagSep = line.lastIndexOf('/'); + int flagSep = line.lastIndexOf(FLAG_SEPARATOR); if (flagSep == -1) { wordForm = NOFLAGS; entry = line; @@ -738,7 +796,9 @@ public class Dictionary { final int count = Integer.parseInt(ruleArgs[1]); aliases = new String[count]; } else { - aliases[aliasCount++] = ruleArgs[1]; + // an alias can map to no flags + String aliasValue = ruleArgs.length == 1 ? "" : ruleArgs[1]; + aliases[aliasCount++] = aliasValue; } } @@ -753,7 +813,7 @@ public class Dictionary { /** * Abstraction of the process of parsing flags taken from the affix and dic files */ - private static abstract class FlagParsingStrategy { + static abstract class FlagParsingStrategy { /** * Parses the given String into a single flag @@ -828,6 +888,9 @@ public class Dictionary { } StringBuilder builder = new StringBuilder(); + if (rawFlags.length() % 2 == 1) { + throw new IllegalArgumentException("Invalid flags (should be even number of characters): " + rawFlags); + } for (int i = 0; i < rawFlags.length(); i+=2) { char cookedFlag = (char) ((int) rawFlags.charAt(i) + (int) rawFlags.charAt(i + 1)); builder.append(cookedFlag); diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/StemmerTestBase.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/StemmerTestBase.java index 083ae26ac76..0613dd37d90 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/StemmerTestBase.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/StemmerTestBase.java @@ -55,7 +55,7 @@ abstract class StemmerTestBase extends LuceneTestCase { } try { - Dictionary dictionary = new Dictionary(affixStream, Arrays.asList(dictStreams), true); + Dictionary dictionary = new Dictionary(affixStream, Arrays.asList(dictStreams), ignoreCase); stemmer = new Stemmer(dictionary); } finally { IOUtils.closeWhileHandlingException(affixStream); diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestAllDictionaries2.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestAllDictionaries2.java index d0a83561802..21d7ad62c36 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestAllDictionaries2.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestAllDictionaries2.java @@ -47,8 +47,8 @@ public class TestAllDictionaries2 extends LuceneTestCase { "afrikaans_spell_checker-20110323-fx+tb+fn+sm.xpi", "dictionaries/af-ZA.dic", "dictionaries/af-ZA.aff", "albanisches_worterbuch-1.6.9-fx+tb+sm+fn.xpi", "dictionaries/sq.dic", "dictionaries/sq.aff", "amharic_spell_checker-0.4-fx+fn+tb+sm.xpi", "dictionaries/am_ET.dic", "dictionaries/am_ET.aff", -//BUG! "arabic_spell_checking_dictionary-3.2.20120321-fx+tb.xpi", "dictionaries/ar.dic", "dictionaries/ar.aff", -//BUG! "armenian_spell_checker_dictionary-0.32-fx+tb+sm.xpi", "dictionaries/hy_AM.dic", "dictionaries/hy_AM.aff", + "arabic_spell_checking_dictionary-3.2.20120321-fx+tb.xpi", "dictionaries/ar.dic", "dictionaries/ar.aff", + "armenian_spell_checker_dictionary-0.32-fx+tb+sm.xpi", "dictionaries/hy_AM.dic", "dictionaries/hy_AM.aff", "azerbaijani_spell_checker-0.3-fx+tb+fn+sm+sb.xpi", "dictionaries/az-Latn-AZ.dic", "dictionaries/az-Latn-AZ.aff", "belarusian_classic_dictionary-0.1.2-tb+fx+sm.xpi", "dictionaries/be-classic.dic", "dictionaries/be-classic.aff", "belarusian_dictionary-0.1.2-fx+sm+tb.xpi", "dictionaries/be.dic", "dictionaries/be.aff", @@ -72,13 +72,13 @@ public class TestAllDictionaries2 extends LuceneTestCase { "diccionario_espanol_argentina-2.5.1-tb+fx+sm.xpi", "dictionaries/es_AR.dic", "dictionaries/es_AR.aff", "diccionario_espanol_mexico-1.1.3-fn+tb+fx+sm.xpi", "dictionaries/es_MX.dic", "dictionaries/es_MX.aff", "diccionario_ortografico_valenciano-2.2.0-fx+tb+fn+sm.xpi", "dictionaries/roa-ES-val.dic", "dictionaries/roa-ES-val.aff", -//BUG! "diccionario_papiamentoaruba-0.2-fn+sm+tb+fx.xpi", "dictionaries/Papiamento.dic", "dictionaries/Papiamento.aff", + "diccionario_papiamentoaruba-0.2-fn+sm+tb+fx.xpi", "dictionaries/Papiamento.dic", "dictionaries/Papiamento.aff", "dictionnaires_francais-5.0.2-fx+tb+sm.xpi", "dictionaries/fr-classic-reform.dic", "dictionaries/fr-classic-reform.aff", "dictionnaires_francais-5.0.2-fx+tb+sm.xpi", "dictionaries/fr-classic.dic", "dictionaries/fr-classic.aff", "dictionnaires_francais-5.0.2-fx+tb+sm.xpi", "dictionaries/fr-modern.dic", "dictionaries/fr-modern.aff", "dictionnaires_francais-5.0.2-fx+tb+sm.xpi", "dictionaries/fr-reform.dic", "dictionaries/fr-reform.aff", "difazier_an_drouizig-0.12-tb+sm+fx.xpi", "dictionaries/br.dic", "dictionaries/br.aff", -//BUG! "dikshonario_papiamentuantia_hulandes-0.5-fx+tb+fn+sb+sm.xpi", "dictionaries/Papiamentu.dic", "dictionaries/Papiamentu.aff", + "dikshonario_papiamentuantia_hulandes-0.5-fx+tb+fn+sb+sm.xpi", "dictionaries/Papiamentu.dic", "dictionaries/Papiamentu.aff", "dizionari_furlan-3.1-tb+fx+sm.xpi", "dictionaries/fur-IT.dic", "dictionaries/fur-IT.aff", "dizionario_italiano-3.3.2-fx+sm+tb.xpi", "dictionaries/it_IT.dic", "dictionaries/it_IT.aff", "eesti_keele_speller-3.2-fx+tb+sm.xpi", "dictionaries/et-EE.dic", "dictionaries/et-EE.aff", @@ -101,10 +101,10 @@ public class TestAllDictionaries2 extends LuceneTestCase { "hausa_spelling_dictionary-0.2-tb+fx.xpi", "dictionaries/ha-GH.dic", "dictionaries/ha-GH.aff", "hebrew_spell_checking_dictionary_from_hspell-1.2.0.1-fx+sm+tb.xpi", "dictionaries/he.dic", "dictionaries/he.aff", "hindi_spell_checker-0.4-fx+tb+sm+sb+fn.xpi", "dictionaries/hi_IN.dic", "dictionaries/hi_IN.aff", -//BUG! "hungarian_dictionary-1.6.1.1-fx+tb+sm+fn.xpi", "dictionaries/hu_HU.dic", "dictionaries/hu_HU.aff", -//BUG! "icelandic_dictionary-1.3-fx+tb+sm.xpi", "dictionaries/is.dic", "dictionaries/is.aff", + "hungarian_dictionary-1.6.1.1-fx+tb+sm+fn.xpi", "dictionaries/hu.dic", "dictionaries/hu.aff", +//BUG: has no encoding declaration "icelandic_dictionary-1.3-fx+tb+sm.xpi", "dictionaries/is.dic", "dictionaries/is.aff", "kamus_pengecek_ejaan_bahasa_indonesia-1.1-fx+tb.xpi", "dictionaries/id.dic", "dictionaries/id.aff", -//BUG! "kannada_spell_checker-2.0.1-tb+sm+fn+an+fx.xpi", "dictionaries/kn.dic", "dictionaries/kn.aff", + "kannada_spell_checker-2.0.1-tb+sm+fn+an+fx.xpi", "dictionaries/kn.dic", "dictionaries/kn.aff", "kashubian_spell_checker_poland-0.9-sm+tb+fx.xpi", "dictionaries/Kaszebsczi.dic", "dictionaries/Kaszebsczi.aff", "kiswahili_spell_checker-0.3-sb+tb+fn+fx+sm.xpi", "dictionaries/sw_TZ.dic", "dictionaries/sw_TZ.aff", "kurdish_spell_checker-0.96-fx+tb+sm.xpi", "dictionaries/ku-TR.dic", "dictionaries/ku-TR.aff", @@ -113,8 +113,8 @@ public class TestAllDictionaries2 extends LuceneTestCase { "lithuanian_spelling_check_dictionary-1.3-fx+tb+sm+fn.xpi", "dictionaries/lt.dic", "dictionaries/lt.aff", "litreoir_gaelspell_do_mhozilla-4.7-tb+fx+sm+fn.xpi", "dictionaries/ga.dic", "dictionaries/ga.aff", "litreoir_na_liongailise-0.03-fx+sm+tb.xpi", "dictionaries/ln-CD.dic", "dictionaries/ln-CD.aff", -//BUG! "macedonian_mk_mk_spellchecker-1.2-fn+tb+fx+sm+sb.xpi", "dictionaries/mk-MK-Cyrl.dic", "dictionaries/mk-MK-Cyrl.aff", -//BUG! "macedonian_mk_mk_spellchecker-1.2-fn+tb+fx+sm+sb.xpi", "dictionaries/mk-MK-Latn.dic", "dictionaries/mk-MK-Latn.aff", + "macedonian_mk_mk_spellchecker-1.2-fn+tb+fx+sm+sb.xpi", "dictionaries/mk-MK-Cyrl.dic", "dictionaries/mk-MK-Cyrl.aff", + "macedonian_mk_mk_spellchecker-1.2-fn+tb+fx+sm+sb.xpi", "dictionaries/mk-MK-Latn.dic", "dictionaries/mk-MK-Latn.aff", "malagasy_spell_checker-0.3-fn+tb+fx+sm+sb.xpi", "dictionaries/mg_MG.dic", "dictionaries/mg_MG.aff", "marathi_dictionary-9.3-sm+tb+sb+fx.xpi", "dictionaries/mr-IN.dic", "dictionaries/mr-IN.aff", "ndebele_south_spell_checker-20110323-tb+fn+fx+sm.xpi", "dictionaries/nr-ZA.dic", "dictionaries/nr-ZA.aff", @@ -125,8 +125,8 @@ public class TestAllDictionaries2 extends LuceneTestCase { "oriya_spell_checker-0.3-fn+tb+fx+sm+sb.xpi", "dictionaries/or-IN.dic", "dictionaries/or-IN.aff", "polski_slownik_poprawnej_pisowni-1.0.20110621-fx+tb+sm.xpi", "dictionaries/pl.dic", "dictionaries/pl.aff", "punjabi_spell_checker-0.3-fx+tb+sm+sb+fn.xpi", "dictionaries/pa-IN.dic", "dictionaries/pa-IN.aff", -//BUG! "romanian_spellchecking_dictionary-1.14-sm+tb+fx.xpi", "dictionaries/ro_RO-ante1993.dic", "dictionaries/ro_RO-ante1993.aff", -//BUG! "russian_hunspell_dictionary-1.0.20131101-tb+sm+fn+fx.xpi", "dictionaries/ru_RU.dic", "dictionaries/ru_RU.aff", + "romanian_spellchecking_dictionary-1.14-sm+tb+fx.xpi", "dictionaries/ro_RO-ante1993.dic", "dictionaries/ro_RO-ante1993.aff", + "russian_hunspell_dictionary-1.0.20131101-tb+sm+fn+fx.xpi", "dictionaries/ru_RU.dic", "dictionaries/ru_RU.aff", "sanskrit_spell_checker-1.1-fx+tb+sm+sb+fn.xpi", "dictionaries/sa_IN.dic", "dictionaries/sa_IN.aff", "scottish_gaelic_spell_checker-2.7-tb+fx+sm.xpi", "dictionaries/gd-GB.dic", "dictionaries/gd-GB.aff", "serbian_dictionary-0.18-fx+tb+sm.xpi", "dictionaries/sr-RS-Cyrl.dic", "dictionaries/sr-RS-Cyrl.aff", @@ -146,22 +146,22 @@ public class TestAllDictionaries2 extends LuceneTestCase { "telugu_spell_checker-0.3-tb+fx+sm.xpi", "dictionaries/te_IN.dic", "dictionaries/te_IN.aff", "te_papakupu_m__ori-0.9.9.20080630-fx+tb.xpi", "dictionaries/mi-x-Tai Tokerau.dic", "dictionaries/mi-x-Tai Tokerau.aff", "te_papakupu_m__ori-0.9.9.20080630-fx+tb.xpi", "dictionaries/mi.dic", "dictionaries/mi.aff", -//BUG! "thamizha_solthiruthitamil_spellchecker-0.8-fx+tb.xpi", "dictionaries/ta_IN.dic", "dictionaries/ta_IN.aff", +//BUG: broken file (hunspell refuses to load, too) "thamizha_solthiruthitamil_spellchecker-0.8-fx+tb.xpi", "dictionaries/ta_IN.dic", "dictionaries/ta_IN.aff", "tsonga_spell_checker-20110323-tb+sm+fx+fn.xpi", "dictionaries/ts-ZA.dic", "dictionaries/ts-ZA.aff", "tswana_spell_checker-20110323-tb+sm+fx+fn.xpi", "dictionaries/tn-ZA.dic", "dictionaries/tn-ZA.aff", "turkce_yazm_denetimi-3.5-sm+tb+fx.xpi", "dictionaries/tr.dic", "dictionaries/tr.aff", -//BUG! "turkmen_spell_checker_dictionary-0.1.6-tb+fx+sm.xpi", "dictionaries/tk_TM.dic", "dictionaries/tk_TM.aff", + "turkmen_spell_checker_dictionary-0.1.6-tb+fx+sm.xpi", "dictionaries/tk_TM.dic", "dictionaries/tk_TM.aff", "ukrainian_dictionary-1.7.0-sm+an+fx+fn+tb.xpi", "dictionaries/uk-UA.dic", "dictionaries/uk-UA.aff", "united_states_english_spellchecker-7.0.1-sm+tb+fx+an.xpi", "dictionaries/en-US.dic", "dictionaries/en-US.aff", "upper_sorbian_spelling_dictionary-0.0.20060327.3-tb+fx+sm.xpi", "dictionaries/hsb.dic", "dictionaries/hsb.aff", -//BUG! "urdu_dictionary-0.64-fx+tb+sm+sb.xpi", "dictionaries/ur.dic", "dictionaries/ur.aff", + "urdu_dictionary-0.64-fx+tb+sm+sb.xpi", "dictionaries/ur.dic", "dictionaries/ur.aff", "uzbek_spell_checker-0.3-fn+tb+fx+sm+sb.xpi", "dictionaries/uz.dic", "dictionaries/uz.aff", "valencian_catalan_dictionary-2.5.0-tb+fn+sm+fx.xpi", "dictionaries/ca-ES-valencia.dic", "dictionaries/ca-ES-valencia.aff", "venda_spell_checker-20110323-tb+fn+fx+sm.xpi", "dictionaries/ve-ZA.dic", "dictionaries/ve-ZA.aff", "verificador_ortografico_para_portugues_do_brasil-2.3-3.2b1-tb+sm+fn+fx.xpi", "dictionaries/pt_BR.dic", "dictionaries/pt_BR.aff", "vietnamese_dictionary-2.1.0.159-an+sm+tb+fx+fn.xpi", "dictionaries/vi-DauCu.dic", "dictionaries/vi-DauCu.aff", "vietnamese_dictionary-2.1.0.159-an+sm+tb+fx+fn.xpi", "dictionaries/vi-DauMoi.dic", "dictionaries/vi-DauMoi.aff", -//BUG! "woordenboek_nederlands-3.1.1-sm+tb+fx+fn.xpi", "dictionaries/nl.dic", "dictionaries/nl.aff", + "woordenboek_nederlands-3.1.1-sm+tb+fx+fn.xpi", "dictionaries/nl.dic", "dictionaries/nl.aff", "xhosa_spell_checker-20110323-tb+fn+fx+sm.xpi", "dictionaries/xh-ZA.dic", "dictionaries/xh-ZA.aff", "xuxen-4.0.1-fx+tb+sm.xpi", "dictionaries/eu.dic", "dictionaries/eu.aff", "yiddish_spell_checker_yivo-0.0.3-sm+fn+fx+tb.xpi", "dictionaries/yi.dic", "dictionaries/yi.aff", @@ -196,7 +196,7 @@ public class TestAllDictionaries2 extends LuceneTestCase { } public void testOneDictionary() throws Exception { - String toTest = "hausa_spelling_dictionary-0.2-tb+fx.xpi"; + String toTest = "hungarian_dictionary-1.6.1.1-fx+tb+sm+fn.xpi"; for (int i = 0; i < tests.length; i++) { if (tests[i].equals(toTest)) { File f = new File(DICTIONARY_HOME, tests[i]); @@ -210,7 +210,7 @@ public class TestAllDictionaries2 extends LuceneTestCase { try (InputStream dictionary = zip.getInputStream(dicEntry); InputStream affix = zip.getInputStream(affEntry)) { - new Dictionary(affix, dictionary); + new Dictionary(affix, dictionary); } } } diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestDictionary.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestDictionary.java index 5d7682e88c0..a653ddb36a2 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestDictionary.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestDictionary.java @@ -17,6 +17,7 @@ package org.apache.lucene.analysis.hunspell; * limitations under the License. */ +import java.io.ByteArrayInputStream; import java.io.FilterInputStream; import java.io.IOException; import java.io.InputStream; @@ -24,6 +25,7 @@ import java.text.ParseException; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.CharsRef; +import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.IntsRef; import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.fst.Builder; @@ -77,6 +79,40 @@ public class TestDictionary extends LuceneTestCase { affixStream.close(); dictStream.close(); } + + public void testCompressedBeforeSetDictionary() throws Exception { + InputStream affixStream = getClass().getResourceAsStream("compressed-before-set.aff"); + InputStream dictStream = getClass().getResourceAsStream("compressed.dic"); + + Dictionary dictionary = new Dictionary(affixStream, dictStream); + assertEquals(3, dictionary.lookupSuffix(new char[]{'e'}, 0, 1).length); + assertEquals(1, dictionary.lookupPrefix(new char[]{'s'}, 0, 1).length); + IntsRef ordList = dictionary.lookupWord(new char[]{'o', 'l', 'r'}, 0, 3); + BytesRef ref = new BytesRef(); + dictionary.flagLookup.get(ordList.ints[0], ref); + char flags[] = Dictionary.decodeFlags(ref); + assertEquals(1, flags.length); + + affixStream.close(); + dictStream.close(); + } + + public void testCompressedEmptyAliasDictionary() throws Exception { + InputStream affixStream = getClass().getResourceAsStream("compressed-empty-alias.aff"); + InputStream dictStream = getClass().getResourceAsStream("compressed.dic"); + + Dictionary dictionary = new Dictionary(affixStream, dictStream); + assertEquals(3, dictionary.lookupSuffix(new char[]{'e'}, 0, 1).length); + assertEquals(1, dictionary.lookupPrefix(new char[]{'s'}, 0, 1).length); + IntsRef ordList = dictionary.lookupWord(new char[]{'o', 'l', 'r'}, 0, 3); + BytesRef ref = new BytesRef(); + dictionary.flagLookup.get(ordList.ints[0], ref); + char flags[] = Dictionary.decodeFlags(ref); + assertEquals(1, flags.length); + + affixStream.close(); + dictStream.close(); + } // malformed rule causes ParseException public void testInvalidData() throws Exception { @@ -87,7 +123,7 @@ public class TestDictionary extends LuceneTestCase { new Dictionary(affixStream, dictStream); fail("didn't get expected exception"); } catch (ParseException expected) { - assertEquals("The affix file contains a rule with less than five elements", expected.getMessage()); + assertTrue(expected.getMessage().startsWith("The affix file contains a rule with less than four elements")); assertEquals(24, expected.getErrorOffset()); } @@ -178,4 +214,16 @@ public class TestDictionary extends LuceneTestCase { Dictionary.applyMappings(fst, sb); assertEquals("ghghghde", sb.toString()); } + + public void testSetWithCrazyWhitespaceAndBOMs() throws Exception { + assertEquals("UTF-8", Dictionary.getDictionaryEncoding(new ByteArrayInputStream("SET\tUTF-8\n".getBytes(IOUtils.CHARSET_UTF_8)))); + assertEquals("UTF-8", Dictionary.getDictionaryEncoding(new ByteArrayInputStream("SET\t UTF-8\n".getBytes(IOUtils.CHARSET_UTF_8)))); + assertEquals("UTF-8", Dictionary.getDictionaryEncoding(new ByteArrayInputStream("\uFEFFSET\tUTF-8\n".getBytes(IOUtils.CHARSET_UTF_8)))); + assertEquals("UTF-8", Dictionary.getDictionaryEncoding(new ByteArrayInputStream("\uFEFFSET\tUTF-8\r\n".getBytes(IOUtils.CHARSET_UTF_8)))); + } + + public void testFlagWithCrazyWhitespace() throws Exception { + assertNotNull(Dictionary.getFlagParsingStrategy("FLAG\tUTF-8")); + assertNotNull(Dictionary.getFlagParsingStrategy("FLAG UTF-8")); + } } diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestEscaped.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestEscaped.java new file mode 100644 index 00000000000..d5b1489b3fc --- /dev/null +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestEscaped.java @@ -0,0 +1,36 @@ +package org.apache.lucene.analysis.hunspell; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.junit.BeforeClass; + +public class TestEscaped extends StemmerTestBase { + @BeforeClass + public static void beforeClass() throws Exception { + init("escaped.aff", "escaped.dic"); + } + + public void testStemming() { + assertStemsTo("works", "work"); + assertStemsTo("work", "work"); + assertStemsTo("R2/D2", "R2/D2"); + assertStemsTo("R2/D2s", "R2/D2"); + assertStemsTo("N/A", "N/A"); + assertStemsTo("N/As"); + } +} diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestHunspellStemFilter.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestHunspellStemFilter.java index f42afcfa9cc..5656f6ee753 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestHunspellStemFilter.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestHunspellStemFilter.java @@ -31,6 +31,7 @@ import org.apache.lucene.analysis.hunspell.Dictionary; import org.apache.lucene.analysis.hunspell.HunspellStemFilter; import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter; import org.apache.lucene.analysis.util.CharArraySet; +import org.apache.lucene.util.IOUtils; import org.junit.AfterClass; import org.junit.BeforeClass; @@ -39,9 +40,13 @@ public class TestHunspellStemFilter extends BaseTokenStreamTestCase { @BeforeClass public static void beforeClass() throws Exception { - try (InputStream affixStream = TestStemmer.class.getResourceAsStream("simple.aff"); - InputStream dictStream = TestStemmer.class.getResourceAsStream("simple.dic")) { + // no multiple try-with to workaround bogus VerifyError + InputStream affixStream = TestStemmer.class.getResourceAsStream("simple.aff"); + InputStream dictStream = TestStemmer.class.getResourceAsStream("simple.dic"); + try { dictionary = new Dictionary(affixStream, dictStream); + } finally { + IOUtils.closeWhileHandlingException(affixStream, dictStream); } } @@ -97,9 +102,13 @@ public class TestHunspellStemFilter extends BaseTokenStreamTestCase { public void testIgnoreCaseNoSideEffects() throws Exception { final Dictionary d; - try (InputStream affixStream = TestStemmer.class.getResourceAsStream("simple.aff"); - InputStream dictStream = TestStemmer.class.getResourceAsStream("simple.dic")) { + // no multiple try-with to workaround bogus VerifyError + InputStream affixStream = TestStemmer.class.getResourceAsStream("simple.aff"); + InputStream dictStream = TestStemmer.class.getResourceAsStream("simple.dic"); + try { d = new Dictionary(affixStream, Collections.singletonList(dictStream), true); + } finally { + IOUtils.closeWhileHandlingException(affixStream, dictStream); } Analyzer a = new Analyzer() { @Override diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestOptionalCondition.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestOptionalCondition.java new file mode 100644 index 00000000000..bddee500347 --- /dev/null +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestOptionalCondition.java @@ -0,0 +1,42 @@ +package org.apache.lucene.analysis.hunspell; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.junit.BeforeClass; + +public class TestOptionalCondition extends StemmerTestBase { + @BeforeClass + public static void beforeClass() throws Exception { + init("optional-condition.aff", "condition.dic"); + } + + public void testStemming() { + assertStemsTo("hello", "hello"); + assertStemsTo("try", "try"); + assertStemsTo("tried", "try"); + assertStemsTo("work", "work"); + assertStemsTo("worked", "work"); + assertStemsTo("rework", "work"); + assertStemsTo("reworked", "work"); + assertStemsTo("retried"); + assertStemsTo("workied"); + assertStemsTo("tryed"); + assertStemsTo("tryied"); + assertStemsTo("helloed"); + } +} diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/broken.aff b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/broken.aff index 3b780cd1d7b..81741793719 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/broken.aff +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/broken.aff @@ -19,6 +19,6 @@ SFX E 0 d o PFX B Y 1 PFX B 0 s o -#wrong rule (only 4 elements) +#wrong rule (only 3 elements) PFX A0 Y 1 -PFX A0 0 a \ No newline at end of file +PFX A0 0 \ No newline at end of file diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/compressed-before-set.aff b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/compressed-before-set.aff new file mode 100644 index 00000000000..e4a1b37300f --- /dev/null +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/compressed-before-set.aff @@ -0,0 +1,29 @@ +SET UTF-8 +TRY abcdefghijklmopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ + +FLAG long + +AF 5 +AF AA +AF BB +AF CC +AF DD +AF EE + +SFX AA Y 3 +SFX AA 0 e n +SFX AA 0 e t +SFX AA 0 e h + +SFX CC Y 2 +SFX CC 0 d/3 c +SFX CC 0 c b + +SFX DD Y 1 +SFX DD 0 s o + +SFX EE Y 1 +SFX EE 0 d o + +PFX BB Y 1 +PFX BB 0 s o diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/compressed-empty-alias.aff b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/compressed-empty-alias.aff new file mode 100644 index 00000000000..a27273fc714 --- /dev/null +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/compressed-empty-alias.aff @@ -0,0 +1,30 @@ +SET UTF-8 +TRY abcdefghijklmopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ + +FLAG long + +AF 6 +AF AA +AF BB +AF CC +AF DD +AF EE +AF + +SFX AA Y 3 +SFX AA 0 e n +SFX AA 0 e t +SFX AA 0 e h + +SFX CC Y 2 +SFX CC 0 d/3 c +SFX CC 0 c b + +SFX DD Y 1 +SFX DD 0 s o + +SFX EE Y 1 +SFX EE 0 d o + +PFX BB Y 1 +PFX BB 0 s o diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/compressed.aff b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/compressed.aff index e4a1b37300f..c747c27ef80 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/compressed.aff +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/compressed.aff @@ -1,8 +1,3 @@ -SET UTF-8 -TRY abcdefghijklmopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ - -FLAG long - AF 5 AF AA AF BB @@ -10,6 +5,11 @@ AF CC AF DD AF EE +SET UTF-8 +TRY abcdefghijklmopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ + +FLAG long + SFX AA Y 3 SFX AA 0 e n SFX AA 0 e t diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/escaped.aff b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/escaped.aff new file mode 100644 index 00000000000..b42845175e2 --- /dev/null +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/escaped.aff @@ -0,0 +1,4 @@ +SET UTF-8 + +SFX A Y 1 +SFX A 0 s . +PLUR diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/escaped.dic b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/escaped.dic new file mode 100644 index 00000000000..93602944260 --- /dev/null +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/escaped.dic @@ -0,0 +1,4 @@ +3 +work/A +R2\/D2/A +N\/A diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/optional-condition.aff b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/optional-condition.aff new file mode 100644 index 00000000000..f2e3b9c2c7c --- /dev/null +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/optional-condition.aff @@ -0,0 +1,14 @@ +SET UTF-8 +TRY esianrtolcdugmphbyfvkwzESIANRTOLCDUGMPHBYFVKWZ’ + +REP 2 +REP f ph +REP ph f + +# has no condition +PFX A Y 1 +PFX A 0 re + +SFX B Y 2 +SFX B 0 ed [^y] +SFX B y ied y \ No newline at end of file diff --git a/lucene/core/src/java/org/apache/lucene/search/FakeScorer.java b/lucene/core/src/java/org/apache/lucene/search/FakeScorer.java index 8884a546b43..89b92a5bb22 100644 --- a/lucene/core/src/java/org/apache/lucene/search/FakeScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/FakeScorer.java @@ -19,7 +19,7 @@ package org.apache.lucene.search; import java.util.Collection; -/** Used by {@link BulkScorers} that need to pass a {@link +/** Used by {@link BulkScorer}s that need to pass a {@link * Scorer} to {@link Collector#setScorer}. */ final class FakeScorer extends Scorer { float score; diff --git a/lucene/core/src/test/org/apache/lucene/codecs/lucene40/TestReuseDocsEnum.java b/lucene/core/src/test/org/apache/lucene/codecs/lucene40/TestReuseDocsEnum.java index 2a8aada85e8..59bcd81a732 100644 --- a/lucene/core/src/test/org/apache/lucene/codecs/lucene40/TestReuseDocsEnum.java +++ b/lucene/core/src/test/org/apache/lucene/codecs/lucene40/TestReuseDocsEnum.java @@ -26,12 +26,13 @@ import org.apache.lucene.index.AtomicReader; import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.DocsEnum; +import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.index.Terms; import org.apache.lucene.index.TermsEnum; import org.apache.lucene.store.Directory; -import org.apache.lucene.util.Bits; import org.apache.lucene.util.Bits.MatchNoBits; +import org.apache.lucene.util.Bits; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.LineFileDocs; @@ -121,8 +122,11 @@ public class TestReuseDocsEnum extends LuceneTestCase { public void testReuseDocsEnumDifferentReader() throws IOException { Directory dir = newDirectory(); Codec cp = TestUtil.alwaysPostingsFormat(new Lucene40RWPostingsFormat()); + MockAnalyzer analyzer = new MockAnalyzer(random()); + analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH)); + RandomIndexWriter writer = new RandomIndexWriter(random(), dir, - newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())).setCodec(cp)); + newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer).setCodec(cp)); int numdocs = atLeast(20); createRandomIndex(numdocs, writer, random()); writer.commit(); diff --git a/lucene/core/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java b/lucene/core/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java index 91d6c61a5f9..ca8817e5a0f 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java @@ -139,8 +139,11 @@ public class TestBackwardsCompatibility extends LuceneTestCase { mp.setUseCompoundFile(false); mp.setNoCFSRatio(1.0); mp.setMaxCFSSegmentSizeMB(Double.POSITIVE_INFINITY); + MockAnalyzer analyzer = new MockAnalyzer(random()); + analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH)); + // TODO: remove randomness - IndexWriterConfig conf = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())) + IndexWriterConfig conf = new IndexWriterConfig(TEST_VERSION_CURRENT, analyzer) .setMergePolicy(mp); conf.setCodec(Codec.forName("Lucene40")); IndexWriter writer = new IndexWriter(dir, conf); diff --git a/lucene/core/src/test/org/apache/lucene/index/TestCustomNorms.java b/lucene/core/src/test/org/apache/lucene/index/TestCustomNorms.java index 2a0528e2b46..aaeeca302dd 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestCustomNorms.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestCustomNorms.java @@ -30,6 +30,7 @@ import org.apache.lucene.search.similarities.Similarity; import org.apache.lucene.store.Directory; import org.apache.lucene.util.LineFileDocs; import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.util.TestUtil; /** * @@ -41,8 +42,11 @@ public class TestCustomNorms extends LuceneTestCase { public void testFloatNorms() throws IOException { Directory dir = newDirectory(); + MockAnalyzer analyzer = new MockAnalyzer(random()); + analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH)); + IndexWriterConfig config = newIndexWriterConfig(TEST_VERSION_CURRENT, - new MockAnalyzer(random())); + analyzer); Similarity provider = new MySimProvider(); config.setSimilarity(provider); RandomIndexWriter writer = new RandomIndexWriter(random(), dir, config); diff --git a/lucene/core/src/test/org/apache/lucene/index/TestDuelingCodecs.java b/lucene/core/src/test/org/apache/lucene/index/TestDuelingCodecs.java index 3da7cb06525..173a3a98aee 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestDuelingCodecs.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestDuelingCodecs.java @@ -20,7 +20,6 @@ package org.apache.lucene.index; import java.io.IOException; import java.util.Random; -import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.codecs.Codec; import org.apache.lucene.document.Document; @@ -63,9 +62,12 @@ public class TestDuelingCodecs extends LuceneTestCase { long seed = random().nextLong(); // must use same seed because of random payloads, etc - Analyzer leftAnalyzer = new MockAnalyzer(new Random(seed)); - Analyzer rightAnalyzer = new MockAnalyzer(new Random(seed)); - + int maxTermLength = TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH); + MockAnalyzer leftAnalyzer = new MockAnalyzer(new Random(seed)); + leftAnalyzer.setMaxTokenLength(maxTermLength); + MockAnalyzer rightAnalyzer = new MockAnalyzer(new Random(seed)); + rightAnalyzer.setMaxTokenLength(maxTermLength); + // but these can be different // TODO: this turns this into a really big test of Multi*, is that what we want? IndexWriterConfig leftConfig = newIndexWriterConfig(TEST_VERSION_CURRENT, leftAnalyzer); diff --git a/lucene/core/src/test/org/apache/lucene/index/TestFlushByRamOrCountsPolicy.java b/lucene/core/src/test/org/apache/lucene/index/TestFlushByRamOrCountsPolicy.java index dd9406925ce..7960b0ce650 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestFlushByRamOrCountsPolicy.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestFlushByRamOrCountsPolicy.java @@ -29,6 +29,7 @@ import org.apache.lucene.store.Directory; import org.apache.lucene.store.MockDirectoryWrapper; import org.apache.lucene.util.LineFileDocs; import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.util.TestUtil; import org.junit.AfterClass; import org.junit.BeforeClass; @@ -64,8 +65,11 @@ public class TestFlushByRamOrCountsPolicy extends LuceneTestCase { AtomicInteger numDocs = new AtomicInteger(numDocumentsToIndex); Directory dir = newDirectory(); MockDefaultFlushPolicy flushPolicy = new MockDefaultFlushPolicy(); + MockAnalyzer analyzer = new MockAnalyzer(random()); + analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH)); + IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, - new MockAnalyzer(random())).setFlushPolicy(flushPolicy); + analyzer).setFlushPolicy(flushPolicy); final int numDWPT = 1 + atLeast(2); DocumentsWriterPerThreadPool threadPool = new ThreadAffinityDocumentsWriterThreadPool( numDWPT); diff --git a/lucene/core/src/test/org/apache/lucene/index/TestForceMergeForever.java b/lucene/core/src/test/org/apache/lucene/index/TestForceMergeForever.java index 53be31c7757..31228877e64 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestForceMergeForever.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestForceMergeForever.java @@ -54,7 +54,10 @@ public class TestForceMergeForever extends LuceneTestCase { public void test() throws Exception { final Directory d = newDirectory(); - final MyIndexWriter w = new MyIndexWriter(d, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()))); + MockAnalyzer analyzer = new MockAnalyzer(random()); + analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH)); + + final MyIndexWriter w = new MyIndexWriter(d, newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer)); // Try to make an index that requires merging: w.getConfig().setMaxBufferedDocs(TestUtil.nextInt(random(), 2, 11)); diff --git a/lucene/core/src/test/org/apache/lucene/index/TestIndexWriterOutOfFileDescriptors.java b/lucene/core/src/test/org/apache/lucene/index/TestIndexWriterOutOfFileDescriptors.java index a1e27bba00c..9e2c45896c5 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestIndexWriterOutOfFileDescriptors.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestIndexWriterOutOfFileDescriptors.java @@ -51,7 +51,9 @@ public class TestIndexWriterOutOfFileDescriptors extends LuceneTestCase { System.out.println("TEST: iter=" + iter); } try { - IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())); + MockAnalyzer analyzer = new MockAnalyzer(random()); + analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH)); + IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer); if (VERBOSE) { // Do this ourselves instead of relying on LTC so diff --git a/lucene/core/src/test/org/apache/lucene/index/TestIndexWriterWithThreads.java b/lucene/core/src/test/org/apache/lucene/index/TestIndexWriterWithThreads.java index 4e40b4159fa..00ebb551db5 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestIndexWriterWithThreads.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestIndexWriterWithThreads.java @@ -548,7 +548,10 @@ public class TestIndexWriterWithThreads extends LuceneTestCase { final int threadCount = TestUtil.nextInt(random(), 2, 6); final AtomicReference writerRef = new AtomicReference(); - writerRef.set(new IndexWriter(d, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())))); + MockAnalyzer analyzer = new MockAnalyzer(random()); + analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH)); + + writerRef.set(new IndexWriter(d, newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer))); final LineFileDocs docs = new LineFileDocs(random()); final Thread[] threads = new Thread[threadCount]; final int iters = atLeast(100); diff --git a/lucene/core/src/test/org/apache/lucene/index/TestNorms.java b/lucene/core/src/test/org/apache/lucene/index/TestNorms.java index 7a0cdb2faef..29fba3b5871 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestNorms.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestNorms.java @@ -75,7 +75,10 @@ public class TestNorms extends LuceneTestCase { // LUCENE-1260 public void testCustomEncoder() throws Exception { Directory dir = newDirectory(); - IndexWriterConfig config = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())); + MockAnalyzer analyzer = new MockAnalyzer(random()); + analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH)); + + IndexWriterConfig config = newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer); config.setSimilarity(new CustomNormEncodingSimilarity()); RandomIndexWriter writer = new RandomIndexWriter(random(), dir, config); Document doc = new Document(); diff --git a/lucene/core/src/test/org/apache/lucene/index/TestRollingUpdates.java b/lucene/core/src/test/org/apache/lucene/index/TestRollingUpdates.java index 008dd854ef8..a5b9e5b81eb 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestRollingUpdates.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestRollingUpdates.java @@ -46,7 +46,10 @@ public class TestRollingUpdates extends LuceneTestCase { Codec.setDefault(TestUtil.alwaysPostingsFormat(new MemoryPostingsFormat(random().nextBoolean(), random.nextFloat()))); } - final IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()))); + MockAnalyzer analyzer = new MockAnalyzer(random()); + analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH)); + + final IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer)); final int SIZE = atLeast(20); int id = 0; IndexReader r = null; diff --git a/lucene/core/src/test/org/apache/lucene/index/TestTermsEnum.java b/lucene/core/src/test/org/apache/lucene/index/TestTermsEnum.java index 011bb8a03ca..1385f3e825b 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestTermsEnum.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestTermsEnum.java @@ -44,7 +44,9 @@ public class TestTermsEnum extends LuceneTestCase { Random random = new Random(random().nextLong()); final LineFileDocs docs = new LineFileDocs(random, true); final Directory d = newDirectory(); - final RandomIndexWriter w = new RandomIndexWriter(random(), d); + MockAnalyzer analyzer = new MockAnalyzer(random()); + analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH)); + final RandomIndexWriter w = new RandomIndexWriter(random(), d, analyzer); final int numDocs = atLeast(10); for(int docCount=0;docCount emptyTerms = Collections.emptyList(); List emptyBytesRef = Collections.emptyList(); diff --git a/lucene/test-framework/src/java/org/apache/lucene/index/BasePostingsFormatTestCase.java b/lucene/test-framework/src/java/org/apache/lucene/index/BasePostingsFormatTestCase.java index d48d5e12ace..d34ce2f293f 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/index/BasePostingsFormatTestCase.java +++ b/lucene/test-framework/src/java/org/apache/lucene/index/BasePostingsFormatTestCase.java @@ -1379,7 +1379,9 @@ public abstract class BasePostingsFormatTestCase extends LuceneTestCase { // during flush/merge public void testInvertedWrite() throws Exception { Directory dir = newDirectory(); - IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())); + MockAnalyzer analyzer = new MockAnalyzer(random()); + analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH)); + IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer); // Must be concurrent because thread(s) can be merging // while up to one thread flushes, and each of those diff --git a/lucene/test-framework/src/java/org/apache/lucene/search/AssertingBulkOutOfOrderScorer.java b/lucene/test-framework/src/java/org/apache/lucene/search/AssertingBulkOutOfOrderScorer.java new file mode 100644 index 00000000000..0b2fa34b044 --- /dev/null +++ b/lucene/test-framework/src/java/org/apache/lucene/search/AssertingBulkOutOfOrderScorer.java @@ -0,0 +1,110 @@ +package org.apache.lucene.search; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.lang.ref.WeakReference; +import java.util.Collection; +import java.util.Collections; +import java.util.Map; +import java.util.Random; +import java.util.WeakHashMap; + +import org.apache.lucene.index.DocsEnum; +import org.apache.lucene.util.VirtualMethod; + +/** A crazy {@link BulkScorer} that wraps a {@link Scorer} + * but shuffles the order of the collected documents. */ +public class AssertingBulkOutOfOrderScorer extends BulkScorer { + + final Random random; + final Scorer scorer; + + public AssertingBulkOutOfOrderScorer(Random random, Scorer scorer) { + this.random = random; + this.scorer = scorer; + } + + private void shuffle(int[] docIDs, float[] scores, int[] freqs, int size) { + for (int i = size - 1; i > 0; --i) { + final int other = random.nextInt(i + 1); + + final int tmpDoc = docIDs[i]; + docIDs[i] = docIDs[other]; + docIDs[other] = tmpDoc; + + final float tmpScore = scores[i]; + scores[i] = scores[other]; + scores[other] = tmpScore; + + final int tmpFreq = freqs[i]; + freqs[i] = freqs[other]; + freqs[other] = tmpFreq; + } + } + + private static void flush(int[] docIDs, float[] scores, int[] freqs, int size, + FakeScorer scorer, Collector collector) throws IOException { + for (int i = 0; i < size; ++i) { + scorer.doc = docIDs[i]; + scorer.freq = freqs[i]; + scorer.score = scores[i]; + collector.collect(scorer.doc); + } + } + + @Override + public boolean score(Collector collector, int max) throws IOException { + if (scorer.docID() == -1) { + scorer.nextDoc(); + } + + FakeScorer fake = new FakeScorer(); + collector.setScorer(fake); + + final int bufferSize = 1 + random.nextInt(100); + final int[] docIDs = new int[bufferSize]; + final float[] scores = new float[bufferSize]; + final int[] freqs = new int[bufferSize]; + + int buffered = 0; + int doc = scorer.docID(); + while (doc < max) { + docIDs[buffered] = doc; + scores[buffered] = scorer.score(); + freqs[buffered] = scorer.freq(); + + if (++buffered == bufferSize) { + shuffle(docIDs, scores, freqs, buffered); + flush(docIDs, scores, freqs, buffered, fake, collector); + buffered = 0; + } + doc = scorer.nextDoc(); + } + + shuffle(docIDs, scores, freqs, buffered); + flush(docIDs, scores, freqs, buffered, fake, collector); + + return doc != Scorer.NO_MORE_DOCS; + } + + @Override + public String toString() { + return "AssertingBulkOutOfOrderScorer(" + scorer + ")"; + } +} diff --git a/lucene/test-framework/src/java/org/apache/lucene/search/AssertingBulkScorer.java b/lucene/test-framework/src/java/org/apache/lucene/search/AssertingBulkScorer.java index 050dfa477c5..995f49aee1d 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/search/AssertingBulkScorer.java +++ b/lucene/test-framework/src/java/org/apache/lucene/search/AssertingBulkScorer.java @@ -34,18 +34,11 @@ public class AssertingBulkScorer extends BulkScorer { private static final VirtualMethod SCORE_COLLECTOR = new VirtualMethod(BulkScorer.class, "score", Collector.class); private static final VirtualMethod SCORE_COLLECTOR_RANGE = new VirtualMethod(BulkScorer.class, "score", Collector.class, int.class); - // we need to track scorers using a weak hash map because otherwise we - // could loose references because of eg. - // AssertingScorer.score(Collector) which needs to delegate to work correctly - private static Map> ASSERTING_INSTANCES = Collections.synchronizedMap(new WeakHashMap>()); - public static BulkScorer wrap(Random random, BulkScorer other) { if (other == null || other instanceof AssertingBulkScorer) { return other; } - final AssertingBulkScorer assertScorer = new AssertingBulkScorer(random, other); - ASSERTING_INSTANCES.put(other, new WeakReference(assertScorer)); - return assertScorer; + return new AssertingBulkScorer(random, other); } public static boolean shouldWrap(BulkScorer inScorer) { @@ -87,4 +80,5 @@ public class AssertingBulkScorer extends BulkScorer { public String toString() { return "AssertingBulkScorer(" + in + ")"; } + } diff --git a/lucene/test-framework/src/java/org/apache/lucene/search/AssertingWeight.java b/lucene/test-framework/src/java/org/apache/lucene/search/AssertingWeight.java index e00dc75ad2b..793b396ab7c 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/search/AssertingWeight.java +++ b/lucene/test-framework/src/java/org/apache/lucene/search/AssertingWeight.java @@ -29,12 +29,14 @@ class AssertingWeight extends Weight { return other instanceof AssertingWeight ? other : new AssertingWeight(random, other); } + final boolean scoresDocsOutOfOrder; final Random random; final Weight in; AssertingWeight(Random random, Weight in) { this.random = random; this.in = in; + scoresDocsOutOfOrder = in.scoresDocsOutOfOrder() || random.nextBoolean(); } @Override @@ -73,8 +75,21 @@ class AssertingWeight extends Weight { if (inScorer == null) { return null; } + if (AssertingBulkScorer.shouldWrap(inScorer)) { + // The incoming scorer already has a specialized + // implementation for BulkScorer, so we should use it: return AssertingBulkScorer.wrap(new Random(random.nextLong()), inScorer); + } else if (scoreDocsInOrder == false && random.nextBoolean()) { + // The caller claims it can handle out-of-order + // docs; let's confirm that by pulling docs and + // randomly shuffling them before collection: + //Scorer scorer = in.scorer(context, acceptDocs); + Scorer scorer = scorer(context, acceptDocs); + + // Scorer should not be null if bulkScorer wasn't: + assert scorer != null; + return new AssertingBulkOutOfOrderScorer(new Random(random.nextLong()), scorer); } else { // Let super wrap this.scorer instead, so we use // AssertingScorer: @@ -84,8 +99,7 @@ class AssertingWeight extends Weight { @Override public boolean scoresDocsOutOfOrder() { - return in.scoresDocsOutOfOrder(); + return scoresDocsOutOfOrder; } - } diff --git a/lucene/test-framework/src/java/org/apache/lucene/search/QueryUtils.java b/lucene/test-framework/src/java/org/apache/lucene/search/QueryUtils.java index 3ecf5713ad7..cd00f74a12d 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/search/QueryUtils.java +++ b/lucene/test-framework/src/java/org/apache/lucene/search/QueryUtils.java @@ -329,7 +329,7 @@ public class QueryUtils { @Override public boolean acceptsDocsOutOfOrder() { - return true; + return false; } }); diff --git a/lucene/test-framework/src/java/org/apache/lucene/search/ShardSearchingTestBase.java b/lucene/test-framework/src/java/org/apache/lucene/search/ShardSearchingTestBase.java index 2b0a6130caa..b0f46f4c34a 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/search/ShardSearchingTestBase.java +++ b/lucene/test-framework/src/java/org/apache/lucene/search/ShardSearchingTestBase.java @@ -449,7 +449,9 @@ public abstract class ShardSearchingTestBase extends LuceneTestCase { myNodeID = nodeID; dir = newFSDirectory(TestUtil.getTempDir("ShardSearchingTestBase")); // TODO: set warmer - IndexWriterConfig iwc = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)); + MockAnalyzer analyzer = new MockAnalyzer(random()); + analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH)); + IndexWriterConfig iwc = new IndexWriterConfig(TEST_VERSION_CURRENT, analyzer); iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE); if (VERBOSE) { iwc.setInfoStream(new PrintStreamInfoStream(System.out)); diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index decef0fa9f0..773540fed95 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -138,6 +138,12 @@ Bug Fixes * SOLR-5818: distrib search with custom comparator does not quite work correctly (Ryan Ernst) +* SOLR-5834: Overseer threads are only being interrupted and not closed. + (hossman, Mark Miller) + +* SOLR-5839: ZookeeperInfoServlet does not trim path properly. + (Furkan KAMACI via Mark Miller) + Optimizations ---------------------- * SOLR-1880: Distributed Search skips GET_FIELDS stage if EXECUTE_QUERY @@ -181,6 +187,13 @@ Other Changes * SOLR-5796: Make how long we are willing to wait for a core to see the ZK advertised leader in it's local state configurable. (Timothy Potter via Mark Miller) + +* SOLR-5825: Separate http request creating and execution in SolrJ + (Steven Bower via Erick Erickson) + +* SOLR-5837: Add hashCode/equals to SolrDocument, SolrInputDocument + and SolrInputField for testing purposes. (Varun Thacker, Noble Paul, + Mark Miller) ================== 4.7.0 ================== diff --git a/solr/core/src/java/org/apache/solr/cloud/Overseer.java b/solr/core/src/java/org/apache/solr/cloud/Overseer.java index 0577d48beab..0704ccccd49 100644 --- a/solr/core/src/java/org/apache/solr/cloud/Overseer.java +++ b/solr/core/src/java/org/apache/solr/cloud/Overseer.java @@ -81,8 +81,8 @@ public class Overseer { //Internal queue where overseer stores events that have not yet been published into cloudstate //If Overseer dies while extracting the main queue a new overseer will start from this queue private final DistributedQueue workQueue; - private volatile boolean isClosed; private Map clusterProps; + private boolean isClosed = false; public ClusterStateUpdater(final ZkStateReader reader, final String myId) { this.zkClient = reader.getZkClient(); @@ -1030,20 +1030,22 @@ public class Overseer { class OverseerThread extends Thread implements ClosableThread { - private volatile boolean isClosed; + protected volatile boolean isClosed; + private ClosableThread thread; - public OverseerThread(ThreadGroup tg, - ClusterStateUpdater clusterStateUpdater) { - super(tg, clusterStateUpdater); + public OverseerThread(ThreadGroup tg, ClosableThread thread) { + super(tg, (Runnable) thread); + this.thread = thread; } - public OverseerThread(ThreadGroup ccTg, - OverseerCollectionProcessor overseerCollectionProcessor, String string) { - super(ccTg, overseerCollectionProcessor, string); + public OverseerThread(ThreadGroup ccTg, ClosableThread thread, String name) { + super(ccTg, (Runnable) thread, name); + this.thread = thread; } @Override public void close() { + thread.close(); this.isClosed = true; } @@ -1084,8 +1086,7 @@ public class Overseer { ThreadGroup ccTg = new ThreadGroup("Overseer collection creation process."); ocp = new OverseerCollectionProcessor(reader, id, shardHandler, adminPath); - ccThread = new OverseerThread(ccTg, ocp, - "Overseer-" + id); + ccThread = new OverseerThread(ccTg, ocp, "Overseer-" + id); ccThread.setDaemon(true); updaterThread.start(); diff --git a/solr/core/src/java/org/apache/solr/servlet/ZookeeperInfoServlet.java b/solr/core/src/java/org/apache/solr/servlet/ZookeeperInfoServlet.java index 32ad684f09f..917b5cd9c44 100644 --- a/solr/core/src/java/org/apache/solr/servlet/ZookeeperInfoServlet.java +++ b/solr/core/src/java/org/apache/solr/servlet/ZookeeperInfoServlet.java @@ -195,7 +195,7 @@ public final class ZookeeperInfoServlet extends HttpServlet { if (path == null) { path = "/"; } else { - path.trim(); + path = path.trim(); if (path.length() == 0) { path = "/"; } diff --git a/solr/core/src/test/org/apache/solr/update/DocumentBuilderTest.java b/solr/core/src/test/org/apache/solr/update/DocumentBuilderTest.java index c8ffe4a7bfd..3d1f6ecd2e8 100644 --- a/solr/core/src/test/org/apache/solr/update/DocumentBuilderTest.java +++ b/solr/core/src/test/org/apache/solr/update/DocumentBuilderTest.java @@ -23,7 +23,9 @@ import org.apache.lucene.index.AtomicReader; import org.apache.lucene.index.NumericDocValues; import org.apache.lucene.index.SlowCompositeReaderWrapper; import org.apache.lucene.search.similarities.DefaultSimilarity; +import org.apache.lucene.util.TestUtil; import org.apache.solr.SolrTestCaseJ4; +import org.apache.solr.common.SolrDocument; import org.apache.solr.common.params.CommonParams; import org.apache.solr.common.SolrException; import org.apache.solr.common.SolrInputDocument; @@ -394,4 +396,106 @@ public class DocumentBuilderTest extends SolrTestCaseJ4 { assertNull(h.validateUpdate(add(xml, new String[0]))); } + public void testSolrDocumentEquals() { + + String randomString = TestUtil.randomSimpleString(random()); + + SolrDocument doc1 = new SolrDocument(); + doc1.addField("foo", randomString); + + SolrDocument doc2 = new SolrDocument(); + doc2.addField("foo", randomString); + + assertTrue(doc1.equals(doc2)); + + doc1.addField("foo", "bar"); + + assertFalse(doc1.equals(doc2)); + + doc1 = new SolrDocument(); + doc1.addField("bar", randomString); + + assertFalse(doc1.equals(doc2)); + + int randomInt = random().nextInt(); + doc1 = new SolrDocument(); + doc1.addField("foo", randomInt); + doc2 = new SolrDocument(); + doc2.addField("foo", randomInt); + + assertTrue(doc1.equals(doc2)); + + doc2 = new SolrDocument(); + doc2.addField("bar", randomInt); + + assertFalse(doc1.equals(doc2)); + + } + + public void testSolrInputDocumentEquality() { + + String randomString = TestUtil.randomSimpleString(random()); + + SolrInputDocument doc1 = new SolrInputDocument(); + doc1.addField("foo", randomString); + SolrInputDocument doc2 = new SolrInputDocument(); + doc2.addField("foo", randomString); + + assertTrue(doc1.equals(doc2)); + + doc1.setDocumentBoost(1.1f); + assertFalse(doc1.equals(doc2)); + + doc2.setDocumentBoost(1.1f); + assertTrue(doc1.equals(doc2)); + + doc2.setDocumentBoost(20f); + assertFalse(doc1.equals(doc2)); + + + doc1 = new SolrInputDocument(); + doc1.addField("foo", randomString); + doc2 = new SolrInputDocument(); + doc2.addField("foo", randomString); + + SolrInputDocument childDoc = new SolrInputDocument(); + childDoc.addField("foo", "bar"); + + doc1.addChildDocument(childDoc); + assertFalse(doc1.equals(doc2)); + + doc2.addChildDocument(childDoc); + assertTrue(doc1.equals(doc2)); + + SolrInputDocument childDoc1 = new SolrInputDocument(); + childDoc.addField(TestUtil.randomSimpleString(random()), TestUtil.randomSimpleString(random())); + doc2.addChildDocument(childDoc1); + assertFalse(doc1.equals(doc2)); + + } + + public void testSolrInputFieldEquality() { + String randomString = TestUtil.randomSimpleString(random(), 10, 20); + + int val = random().nextInt(); + SolrInputField sif1 = new SolrInputField(randomString); + sif1.setValue(val, 1.0f); + SolrInputField sif2 = new SolrInputField(randomString); + sif2.setValue(val, 1.0f); + + assertTrue(sif1.equals(sif2)); + + sif1.setBoost(2.1f); + sif2.setBoost(2.1f); + assertTrue(sif1.equals(sif2)); + + sif2.setBoost(2.0f); + assertFalse(sif1.equals(sif2)); + + sif2.setName("foo"); + assertFalse(sif1.equals(sif2)); + + + } + } diff --git a/solr/solrj/src/java/org/apache/solr/client/solrj/impl/HttpSolrServer.java b/solr/solrj/src/java/org/apache/solr/client/solrj/impl/HttpSolrServer.java index f1dc3d190fa..7ba878a7317 100644 --- a/solr/solrj/src/java/org/apache/solr/client/solrj/impl/HttpSolrServer.java +++ b/solr/solrj/src/java/org/apache/solr/client/solrj/impl/HttpSolrServer.java @@ -199,8 +199,11 @@ public class HttpSolrServer extends SolrServer { return request(request, responseParser); } - public NamedList request(final SolrRequest request, - final ResponseParser processor) throws SolrServerException, IOException { + public NamedList request(final SolrRequest request, final ResponseParser processor) throws SolrServerException, IOException { + return executeMethod(createMethod(request),processor); + } + + protected HttpRequestBase createMethod(final SolrRequest request) throws IOException, SolrServerException { HttpRequestBase method = null; InputStream is = null; SolrParams params = request.getParams(); @@ -382,6 +385,10 @@ public class HttpSolrServer extends SolrServer { throw new SolrServerException("error reading streams", ex); } + return method; + } + + protected NamedList executeMethod(HttpRequestBase method, final ResponseParser processor) throws SolrServerException { // XXX client already has this set, is this needed? method.getParams().setParameter(ClientPNames.HANDLE_REDIRECTS, followRedirects); diff --git a/solr/solrj/src/java/org/apache/solr/common/SolrDocument.java b/solr/solrj/src/java/org/apache/solr/common/SolrDocument.java index eae68355b77..50a302c7059 100644 --- a/solr/solrj/src/java/org/apache/solr/common/SolrDocument.java +++ b/solr/solrj/src/java/org/apache/solr/common/SolrDocument.java @@ -213,7 +213,41 @@ public class SolrDocument implements Map, Iterable> iterator() { return _fields.entrySet().iterator(); } - + /** + * This method is implemented for tests and should not be counted + * on in production code. + * + * @lucene.experimental + */ + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (!(o instanceof SolrDocument)) { + return false; + } + + SolrDocument solrDocument = (SolrDocument) o; + + if (!_fields.equals(solrDocument._fields)) { + return false; + } + + return true; + } + + /** + * This method is implemented for tests and should not be counted + * on in production code. + * + * @lucene.experimental + */ + @Override + public int hashCode() { + return _fields.hashCode(); + } + //----------------------------------------------------------------------------------------- // JSTL Helpers //----------------------------------------------------------------------------------------- diff --git a/solr/solrj/src/java/org/apache/solr/common/SolrInputDocument.java b/solr/solrj/src/java/org/apache/solr/common/SolrInputDocument.java index f731910f196..99ec58e99ef 100644 --- a/solr/solrj/src/java/org/apache/solr/common/SolrInputDocument.java +++ b/solr/solrj/src/java/org/apache/solr/common/SolrInputDocument.java @@ -275,7 +275,51 @@ public class SolrInputDocument implements Map, Iterable values() { return _fields.values(); } - + + /** + * This method is implemented for tests and should not be counted + * on in production code. + * + * @lucene.experimental + */ + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (!(o instanceof SolrInputDocument)) { + return false; + } + + SolrInputDocument sdoc = (SolrInputDocument) o; + + if (!_fields.equals(sdoc._fields)) { + return false; + } + if (Float.compare(sdoc._documentBoost, _documentBoost) != 0) { + return false; + } + if (_childDocuments != null ? !_childDocuments.equals(sdoc._childDocuments) : sdoc._childDocuments != null) { + return false; + } + + return true; + } + + /** + * This method is implemented for tests and should not be counted + * on in production code. + * + * @lucene.experimental + */ + @Override + public int hashCode() { + int result = _fields.hashCode(); + result = 31 * result + (_documentBoost != +0.0f ? Float.floatToIntBits(_documentBoost) : 0); + result = 31 * result + (_childDocuments != null ? _childDocuments.hashCode() : 0); + return result; + } + public void addChildDocument(SolrInputDocument child) { if (_childDocuments == null) { _childDocuments = new ArrayList(); diff --git a/solr/solrj/src/java/org/apache/solr/common/SolrInputField.java b/solr/solrj/src/java/org/apache/solr/common/SolrInputField.java index 4c1a72940ab..f5cb5d1aff0 100644 --- a/solr/solrj/src/java/org/apache/solr/common/SolrInputField.java +++ b/solr/solrj/src/java/org/apache/solr/common/SolrInputField.java @@ -229,4 +229,50 @@ public class SolrInputField implements Iterable, Serializable } return clone; } + + /** + * This method is implemented for tests and should not be counted + * on in production code. + * + * @lucene.experimental + */ + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (!(o instanceof SolrInputField)) { + return false; + } + + SolrInputField sif = (SolrInputField) o; + + if (!name.equals(sif.name)) { + return false; + } + + if (!value.equals(sif.value)) { + return false; + } + + if (Float.compare(sif.boost, boost) != 0) { + return false; + } + + return true; + } + + /** + * This method is implemented for tests and should not be counted + * on in production code. + * + * @lucene.experimental + */ + @Override + public int hashCode() { + int result = name.hashCode(); + result = 31 * result + value.hashCode(); + result = 31 * result + (boost != +0.0f ? Float.floatToIntBits(boost) : 0); + return result; + } } diff --git a/solr/solrj/src/java/org/apache/solr/common/util/JavaBinCodec.java b/solr/solrj/src/java/org/apache/solr/common/util/JavaBinCodec.java index 3e83f24a32f..41e7f54d0c8 100644 --- a/solr/solrj/src/java/org/apache/solr/common/util/JavaBinCodec.java +++ b/solr/solrj/src/java/org/apache/solr/common/util/JavaBinCodec.java @@ -521,7 +521,7 @@ public class JavaBinCodec { public Object getValue() { return value; } - + @Override public String toString() { return "MapEntry[" + key.toString() + ":" + value.toString() + "]"; @@ -530,7 +530,28 @@ public class JavaBinCodec { @Override public Object setValue(Object value) { throw new UnsupportedOperationException(); - }}; + } + + @Override + public int hashCode() { + int result = 31; + result *=31 + getKey().hashCode(); + result *=31 + getValue().hashCode(); + return result; + } + + @Override + public boolean equals(Object obj) { + if(this == obj) { + return true; + } + if(!(obj instanceof Entry)) { + return false; + } + Map.Entry entry = (Entry) obj; + return (this.getKey().equals(entry.getKey()) && this.getValue().equals(entry.getValue())); + } + }; } /** diff --git a/solr/solrj/src/test-files/solrj/javabin_backcompat.bin b/solr/solrj/src/test-files/solrj/javabin_backcompat.bin new file mode 100644 index 00000000000..6e9d32ff78f Binary files /dev/null and b/solr/solrj/src/test-files/solrj/javabin_backcompat.bin differ diff --git a/solr/solrj/src/test/org/apache/solr/common/util/TestJavaBinCodec.java b/solr/solrj/src/test/org/apache/solr/common/util/TestJavaBinCodec.java index 1f7eebf3cf8..3145d5181e5 100644 --- a/solr/solrj/src/test/org/apache/solr/common/util/TestJavaBinCodec.java +++ b/solr/solrj/src/test/org/apache/solr/common/util/TestJavaBinCodec.java @@ -17,17 +17,37 @@ package org.apache.solr.common.util; * limitations under the License. */ +import java.io.BufferedOutputStream; import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; +import java.io.File; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Date; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import org.apache.commons.io.IOUtils; import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.TestUtil; +import org.apache.solr.common.EnumFieldValue; +import org.apache.solr.common.SolrDocument; +import org.apache.solr.common.SolrDocumentList; +import org.apache.solr.common.SolrInputDocument; +import org.junit.Test; public class TestJavaBinCodec extends LuceneTestCase { - + + private static final String SOLRJ_JAVABIN_BACKCOMPAT_BIN = "/solrj/javabin_backcompat.bin"; +private final String BIN_FILE_LOCATION = "./solr/solrj/src/test-files/solrj/javabin_backcompat.bin"; + public void testStrings() throws Exception { JavaBinCodec javabin = new JavaBinCodec(); - for (int i = 0; i < 10000*RANDOM_MULTIPLIER; i++) { + for (int i = 0; i < 10000 * RANDOM_MULTIPLIER; i++) { String s = TestUtil.randomUnicodeString(random()); ByteArrayOutputStream os = new ByteArrayOutputStream(); javabin.marshal(s, os); @@ -36,4 +56,158 @@ public class TestJavaBinCodec extends LuceneTestCase { assertEquals(s, o); } } + + private List generateAllDataTypes() { + List types = new ArrayList<>(); + + types.add(null); //NULL + types.add(true); + types.add(false); + types.add((byte) 1); + types.add((short) 2); + types.add((double) 3); + + types.add(-4); + types.add(4); + types.add(42); + + types.add((long) -5); + types.add((long) 5); + types.add((long) 50); + + types.add((float) 6); + types.add(new Date(0)); + + Map map = new HashMap<>(); + map.put(1, 2); + types.add(map); + + SolrDocument doc = new SolrDocument(); + doc.addField("foo", "bar"); + types.add(doc); + + SolrDocumentList solrDocs = new SolrDocumentList(); + solrDocs.setMaxScore(1.0f); + solrDocs.setNumFound(1); + solrDocs.setStart(0); + solrDocs.add(0, doc); + types.add(solrDocs); + + types.add(new byte[] {1,2,3,4,5}); + + // TODO? + // List list = new ArrayList(); + // list.add("one"); + // types.add(list.iterator()); + + types.add((byte) 15); //END + + SolrInputDocument idoc = new SolrInputDocument(); + idoc.addField("foo", "bar"); + types.add(idoc); + + SolrInputDocument parentDoc = new SolrInputDocument(); + parentDoc.addField("foo", "bar"); + SolrInputDocument childDoc = new SolrInputDocument(); + childDoc.addField("foo", "bar"); + parentDoc.addChildDocument(childDoc); + types.add(parentDoc); + + types.add(new EnumFieldValue(1, "foo")); + + types.add(map.entrySet().iterator().next()); //Map.Entry + + types.add((byte) (1 << 5)); //TAG_AND_LEN + + types.add("foo"); + types.add(1); + types.add((long) 2); + + SimpleOrderedMap simpleOrderedMap = new SimpleOrderedMap(); + simpleOrderedMap.add("bar", "barbar"); + types.add(simpleOrderedMap); + + NamedList nl = new NamedList<>(); + nl.add("foo", "barbar"); + types.add(nl); + + return types; + } + + @Test + public void testBackCompat() { + List iteratorAsList = null; + JavaBinCodec javabin = new JavaBinCodec(){ + @Override + public List readIterator(DataInputInputStream fis) throws IOException { + return super.readIterator(fis); + } + }; + try { + InputStream is = getClass().getResourceAsStream(SOLRJ_JAVABIN_BACKCOMPAT_BIN); + List unmarshaledObj = (List) javabin.unmarshal(is); + List matchObj = generateAllDataTypes(); + + assertEquals(unmarshaledObj.size(), matchObj.size()); + for(int i=0; i < unmarshaledObj.size(); i++) { + + if(unmarshaledObj.get(i) instanceof byte[] && matchObj.get(i) instanceof byte[]) { + byte[] b1 = (byte[]) unmarshaledObj.get(i); + byte[] b2 = (byte[]) matchObj.get(i); + assertTrue(Arrays.equals(b1, b2)); + + } else { + assertEquals(unmarshaledObj.get(i), matchObj.get(i)); + } + + } + } catch (IOException e) { + fail(e.getMessage()); + } + + } + + @Test + public void testForwardCompat() { + JavaBinCodec javabin = new JavaBinCodec(); + ByteArrayOutputStream os = new ByteArrayOutputStream(); + + Object data = generateAllDataTypes(); + try { + javabin.marshal(data, os); + byte[] newFormatBytes = os.toByteArray(); + + InputStream is = getClass().getResourceAsStream(SOLRJ_JAVABIN_BACKCOMPAT_BIN); + byte[] currentFormatBytes = IOUtils.toByteArray(is); + + for (int i = 1; i < currentFormatBytes.length; i++) {//ignore the first byte. It is version information + assertEquals(currentFormatBytes[i], newFormatBytes[i]); + } + + } catch (IOException e) { + e.printStackTrace(); + fail(e.getMessage()); + } + + } + + public void genBinaryFile() throws IOException { + JavaBinCodec javabin = new JavaBinCodec(); + ByteArrayOutputStream os = new ByteArrayOutputStream(); + + Object data = generateAllDataTypes(); + + javabin.marshal(data, os); + byte[] out = os.toByteArray(); + FileOutputStream fs = new FileOutputStream(new File(BIN_FILE_LOCATION)); + BufferedOutputStream bos = new BufferedOutputStream(fs); + bos.write(out); + bos.close(); + } + + public static void main(String[] args) throws IOException { + TestJavaBinCodec test = new TestJavaBinCodec(); + test.genBinaryFile(); + } + }