LUCENE-5487: merge trunk

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene5487@1576473 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael McCandless 2014-03-11 19:03:45 +00:00
commit 2d88332e9d
55 changed files with 1065 additions and 161 deletions

View File

@ -18,6 +18,12 @@
-->
<project name="lucene-solr" default="-projecthelp" basedir=".">
<!-- Look for property definition in various *build.properties files -->
<property file="${user.home}/lucene.build.properties"/>
<property file="${user.home}/build.properties"/>
<property file="${basedir}/build.properties"/>
<property file="lucene/build.properties"/><!-- hack for Lucene users, clones Lucene's common-build.xml -->
<target name="-projecthelp">
<java fork="false" classname="org.apache.tools.ant.Main" taskname="-">
<arg value="-projecthelp"/>
@ -268,10 +274,6 @@
</target>
<target name="idea" depends="resolve" description="Setup IntelliJ IDEA configuration">
<!-- Look for property definition for ${idea.jdk} in various *build.properties files -->
<property file="lucene/build.properties"/> <!-- Look in the current project first -->
<property file="${user.home}/lucene.build.properties"/>
<property file="${user.home}/build.properties"/>
<condition property="idea.jdk.is.set">
<isset property="idea.jdk"/>
</condition>

View File

@ -92,7 +92,7 @@
<svn-checker failonmodifications="true"/>
</target>
<property name="svnkit.version" value="1.7.8"/>
<property name="svnkit.version" value="1.8.4"/>
<macrodef xmlns:ivy="antlib:org.apache.ivy.ant" name="svn-checker">
<attribute name="failonmodifications" default="true"/> <!-- false if file modifications are allowed -->
@ -107,8 +107,6 @@
import org.tmatesoft.svn.core.wc.*;
import org.apache.tools.ant.Project;
def RECOMMENDED_SVNKIT_18 = '1.8.2';
SVNClientManager manager = SVNClientManager.newInstance();
SVNStatusClient statusClient = manager.getStatusClient();
SVNWCClient wcClient = manager.getWCClient();
@ -124,11 +122,7 @@
def ec = ex.getErrorMessage().getErrorCode();
int code = ec.getCode();
int category = ec.getCategory();
if (code == SVNErrorCode.WC_UNSUPPORTED_FORMAT.getCode()) {
task.log('WARNING: Unsupported SVN working copy version! Disabling checks...', Project.MSG_WARN);
task.log('If your working copy is on version 1.8 already, please pass -Dsvnkit.version=' + RECOMMENDED_SVNKIT_18 + ' to successfully run checks.', Project.MSG_INFO);
return;
} else if (code == SVNErrorCode.WC_NOT_DIRECTORY.getCode() || code == SVNErrorCode.WC_NOT_FILE.getCode()) {
if (code == SVNErrorCode.WC_NOT_DIRECTORY.getCode() || code == SVNErrorCode.WC_NOT_FILE.getCode()) {
task.log('WARNING: Development directory is not an SVN checkout! Disabling checks...', Project.MSG_WARN);
return;
} else if (category == SVNErrorCode.WC_CATEGORY) {

View File

@ -147,15 +147,33 @@ Bug fixes
recursive affix application are driven correctly by continuation classes in the affix file.
(Robert Muir)
* LUCENE-5497: HunspellStemFilter properly handles escaped terms and affixes without conditions.
(Robert Muir)
* LUCENE-5505: HunspellStemFilter ignores BOM markers in dictionaries and handles varying
types of whitespace in SET/FLAG commands. (Robert Muir)
* LUCENE-5507: Fix HunspellStemFilter loading of dictionaries with large amounts of aliases
etc before the encoding declaration. (Robert Muir)
* LUCENE-5502: Fixed TermsFilter.equals that could return true for different
filters. (Igor Motov via Adrien Grand)
Test Framework
* LUCENE-5449: Rename _TestUtil and _TestHelper to remove the leading _.
* LUCENE-5501: Added random out-of-order collection testing (when the collector
supports it) to AssertingIndexSearcher. (Adrien Grand)
Build
* LUCENE-5463: RamUsageEstimator.(human)sizeOf(Object) is now a forbidden API.
(Adrien Grand, Robert Muir)
* LUCENE-5511: "ant precommit" / "ant check-svn-working-copy" now work again
with any working copy format (thanks to svnkit 1.8.4). (Uwe Schindler)
======================= Lucene 4.7.0 =======================
New Features
@ -188,7 +206,7 @@ New Features
AnalyzingInfixSuggester but boosts suggestions that matched tokens
with lower positions. (Remi Melisson via Mike McCandless)
* LUCENE-4399: When sorting by String (SortField.STRING), you can now
* LUCENE-5399: When sorting by String (SortField.STRING), you can now
specify whether missing values should be sorted first (the default),
using SortField.setMissingValue(SortField.STRING_FIRST), or last,
using SortField.setMissingValue(SortField.STRING_LAST). (Rob Muir,

View File

@ -35,12 +35,16 @@ import org.apache.lucene.util.fst.Outputs;
import org.apache.lucene.util.fst.Util;
import java.io.BufferedInputStream;
import java.io.BufferedOutputStream;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.LineNumberReader;
import java.io.OutputStream;
import java.nio.charset.Charset;
import java.nio.charset.CharsetDecoder;
import java.nio.charset.CodingErrorAction;
@ -54,6 +58,7 @@ import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.TreeMap;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
@ -154,21 +159,41 @@ public class Dictionary {
this.ignoreCase = ignoreCase;
this.needsInputCleaning = ignoreCase;
this.needsOutputCleaning = false; // set if we have an OCONV
// TODO: we really need to probably buffer this on disk since so many newer dictionaries
// (en_GB, hu_HU, etc) now have tons of AM lines (morph metadata) etc before they finally declare
// their encoding... but for now this large buffer is a workaround
BufferedInputStream buffered = new BufferedInputStream(affix, 65536);
buffered.mark(65536);
String encoding = getDictionaryEncoding(buffered);
buffered.reset();
CharsetDecoder decoder = getJavaEncoding(encoding);
readAffixFile(buffered, decoder);
flagLookup.add(new BytesRef()); // no flags -> ord 0
stripLookup.add(new BytesRef()); // no strip -> ord 0
IntSequenceOutputs o = IntSequenceOutputs.getSingleton();
Builder<IntsRef> b = new Builder<IntsRef>(FST.INPUT_TYPE.BYTE4, o);
readDictionaryFiles(dictionaries, decoder, b);
words = b.finish();
File aff = File.createTempFile("affix", "aff", tempDir);
OutputStream out = new BufferedOutputStream(new FileOutputStream(aff));
InputStream aff1 = null;
InputStream aff2 = null;
try {
// copy contents of affix stream to temp file
final byte [] buffer = new byte [1024 * 8];
int len;
while ((len = affix.read(buffer)) > 0) {
out.write(buffer, 0, len);
}
out.close();
// pass 1: get encoding
aff1 = new BufferedInputStream(new FileInputStream(aff));
String encoding = getDictionaryEncoding(aff1);
// pass 2: parse affixes
CharsetDecoder decoder = getJavaEncoding(encoding);
aff2 = new BufferedInputStream(new FileInputStream(aff));
readAffixFile(aff2, decoder);
// read dictionary entries
IntSequenceOutputs o = IntSequenceOutputs.getSingleton();
Builder<IntsRef> b = new Builder<IntsRef>(FST.INPUT_TYPE.BYTE4, o);
readDictionaryFiles(dictionaries, decoder, b);
words = b.finish();
aliases = null; // no longer needed
} finally {
IOUtils.closeWhileHandlingException(out, aff1, aff2);
aff.delete();
}
}
/**
@ -251,6 +276,10 @@ public class Dictionary {
LineNumberReader reader = new LineNumberReader(new InputStreamReader(affixStream, decoder));
String line = null;
while ((line = reader.readLine()) != null) {
// ignore any BOM marker on first line
if (reader.getLineNumber() == 1 && line.startsWith("\uFEFF")) {
line = line.substring(1);
}
if (line.startsWith(ALIAS_KEY)) {
parseAlias(line);
} else if (line.startsWith(PREFIX_KEY)) {
@ -348,8 +377,10 @@ public class Dictionary {
String line = reader.readLine();
String ruleArgs[] = line.split("\\s+");
if (ruleArgs.length < 5) {
throw new ParseException("The affix file contains a rule with less than five elements", reader.getLineNumber());
// from the manpage: PFX flag stripping prefix [condition [morphological_fields...]]
// condition is optional
if (ruleArgs.length < 4) {
throw new ParseException("The affix file contains a rule with less than four elements: " + line, reader.getLineNumber());
}
char flag = flagParsingStrategy.parseFlag(ruleArgs[1]);
@ -370,7 +401,7 @@ public class Dictionary {
Arrays.sort(appendFlags);
}
String condition = ruleArgs[4];
String condition = ruleArgs.length > 4 ? ruleArgs[4] : ".";
// at least the gascon affix file has this issue
if (condition.startsWith("[") && !condition.endsWith("]")) {
condition = condition + "]";
@ -464,6 +495,9 @@ public class Dictionary {
return builder.finish();
}
/** pattern accepts optional BOM + SET + any whitespace */
final static Pattern ENCODING_PATTERN = Pattern.compile("^(\u00EF\u00BB\u00BF)?SET\\s+");
/**
* Parses the encoding specified in the affix file readable through the provided InputStream
@ -473,7 +507,7 @@ public class Dictionary {
* @throws IOException Can be thrown while reading from the InputStream
* @throws ParseException Thrown if the first non-empty non-comment line read from the file does not adhere to the format {@code SET <encoding>}
*/
private String getDictionaryEncoding(InputStream affix) throws IOException, ParseException {
static String getDictionaryEncoding(InputStream affix) throws IOException, ParseException {
final StringBuilder encoding = new StringBuilder();
for (;;) {
encoding.setLength(0);
@ -496,9 +530,10 @@ public class Dictionary {
}
continue;
}
if (encoding.length() > 4 && "SET ".equals(encoding.substring(0, 4))) {
// cleanup the encoding string, too (whitespace)
return encoding.substring(4).trim();
Matcher matcher = ENCODING_PATTERN.matcher(encoding);
if (matcher.find()) {
int last = matcher.end();
return encoding.substring(last).trim();
}
}
}
@ -536,8 +571,12 @@ public class Dictionary {
* @param flagLine Line containing the flag information
* @return FlagParsingStrategy that handles parsing flags in the way specified in the FLAG definition
*/
private FlagParsingStrategy getFlagParsingStrategy(String flagLine) {
String flagType = flagLine.substring(5);
static FlagParsingStrategy getFlagParsingStrategy(String flagLine) {
String parts[] = flagLine.split("\\s+");
if (parts.length != 2) {
throw new IllegalArgumentException("Illegal FLAG specification: " + flagLine);
}
String flagType = parts[1];
if (NUM_FLAG_TYPE.equals(flagType)) {
return new NumFlagParsingStrategy();
@ -550,6 +589,24 @@ public class Dictionary {
throw new IllegalArgumentException("Unknown flag type: " + flagType);
}
final char FLAG_SEPARATOR = 0x1f; // flag separator after escaping
String unescapeEntry(String entry) {
StringBuilder sb = new StringBuilder();
for (int i = 0; i < entry.length(); i++) {
char ch = entry.charAt(i);
if (ch == '\\' && i+1 < entry.length()) {
sb.append(entry.charAt(i+1));
i++;
} else if (ch == '/') {
sb.append(FLAG_SEPARATOR);
} else {
sb.append(ch);
}
}
return sb.toString();
}
/**
* Reads the dictionary file through the provided InputStreams, building up the words map
*
@ -570,8 +627,9 @@ public class Dictionary {
String line = lines.readLine(); // first line is number of entries (approximately, sometimes)
while ((line = lines.readLine()) != null) {
line = unescapeEntry(line);
if (needsInputCleaning) {
int flagSep = line.lastIndexOf('/');
int flagSep = line.lastIndexOf(FLAG_SEPARATOR);
if (flagSep == -1) {
CharSequence cleansed = cleanInput(line, sb);
writer.write(cleansed.toString().getBytes(IOUtils.CHARSET_UTF_8));
@ -604,7 +662,7 @@ public class Dictionary {
scratch1.length = o1.length;
for (int i = scratch1.length - 1; i >= 0; i--) {
if (scratch1.bytes[scratch1.offset + i] == '/') {
if (scratch1.bytes[scratch1.offset + i] == FLAG_SEPARATOR) {
scratch1.length = i;
break;
}
@ -615,7 +673,7 @@ public class Dictionary {
scratch2.length = o2.length;
for (int i = scratch2.length - 1; i >= 0; i--) {
if (scratch2.bytes[scratch2.offset + i] == '/') {
if (scratch2.bytes[scratch2.offset + i] == FLAG_SEPARATOR) {
scratch2.length = i;
break;
}
@ -648,7 +706,7 @@ public class Dictionary {
String entry;
char wordForm[];
int flagSep = line.lastIndexOf('/');
int flagSep = line.lastIndexOf(FLAG_SEPARATOR);
if (flagSep == -1) {
wordForm = NOFLAGS;
entry = line;
@ -738,7 +796,9 @@ public class Dictionary {
final int count = Integer.parseInt(ruleArgs[1]);
aliases = new String[count];
} else {
aliases[aliasCount++] = ruleArgs[1];
// an alias can map to no flags
String aliasValue = ruleArgs.length == 1 ? "" : ruleArgs[1];
aliases[aliasCount++] = aliasValue;
}
}
@ -753,7 +813,7 @@ public class Dictionary {
/**
* Abstraction of the process of parsing flags taken from the affix and dic files
*/
private static abstract class FlagParsingStrategy {
static abstract class FlagParsingStrategy {
/**
* Parses the given String into a single flag
@ -828,6 +888,9 @@ public class Dictionary {
}
StringBuilder builder = new StringBuilder();
if (rawFlags.length() % 2 == 1) {
throw new IllegalArgumentException("Invalid flags (should be even number of characters): " + rawFlags);
}
for (int i = 0; i < rawFlags.length(); i+=2) {
char cookedFlag = (char) ((int) rawFlags.charAt(i) + (int) rawFlags.charAt(i + 1));
builder.append(cookedFlag);

View File

@ -55,7 +55,7 @@ abstract class StemmerTestBase extends LuceneTestCase {
}
try {
Dictionary dictionary = new Dictionary(affixStream, Arrays.asList(dictStreams), true);
Dictionary dictionary = new Dictionary(affixStream, Arrays.asList(dictStreams), ignoreCase);
stemmer = new Stemmer(dictionary);
} finally {
IOUtils.closeWhileHandlingException(affixStream);

View File

@ -47,8 +47,8 @@ public class TestAllDictionaries2 extends LuceneTestCase {
"afrikaans_spell_checker-20110323-fx+tb+fn+sm.xpi", "dictionaries/af-ZA.dic", "dictionaries/af-ZA.aff",
"albanisches_worterbuch-1.6.9-fx+tb+sm+fn.xpi", "dictionaries/sq.dic", "dictionaries/sq.aff",
"amharic_spell_checker-0.4-fx+fn+tb+sm.xpi", "dictionaries/am_ET.dic", "dictionaries/am_ET.aff",
//BUG! "arabic_spell_checking_dictionary-3.2.20120321-fx+tb.xpi", "dictionaries/ar.dic", "dictionaries/ar.aff",
//BUG! "armenian_spell_checker_dictionary-0.32-fx+tb+sm.xpi", "dictionaries/hy_AM.dic", "dictionaries/hy_AM.aff",
"arabic_spell_checking_dictionary-3.2.20120321-fx+tb.xpi", "dictionaries/ar.dic", "dictionaries/ar.aff",
"armenian_spell_checker_dictionary-0.32-fx+tb+sm.xpi", "dictionaries/hy_AM.dic", "dictionaries/hy_AM.aff",
"azerbaijani_spell_checker-0.3-fx+tb+fn+sm+sb.xpi", "dictionaries/az-Latn-AZ.dic", "dictionaries/az-Latn-AZ.aff",
"belarusian_classic_dictionary-0.1.2-tb+fx+sm.xpi", "dictionaries/be-classic.dic", "dictionaries/be-classic.aff",
"belarusian_dictionary-0.1.2-fx+sm+tb.xpi", "dictionaries/be.dic", "dictionaries/be.aff",
@ -72,13 +72,13 @@ public class TestAllDictionaries2 extends LuceneTestCase {
"diccionario_espanol_argentina-2.5.1-tb+fx+sm.xpi", "dictionaries/es_AR.dic", "dictionaries/es_AR.aff",
"diccionario_espanol_mexico-1.1.3-fn+tb+fx+sm.xpi", "dictionaries/es_MX.dic", "dictionaries/es_MX.aff",
"diccionario_ortografico_valenciano-2.2.0-fx+tb+fn+sm.xpi", "dictionaries/roa-ES-val.dic", "dictionaries/roa-ES-val.aff",
//BUG! "diccionario_papiamentoaruba-0.2-fn+sm+tb+fx.xpi", "dictionaries/Papiamento.dic", "dictionaries/Papiamento.aff",
"diccionario_papiamentoaruba-0.2-fn+sm+tb+fx.xpi", "dictionaries/Papiamento.dic", "dictionaries/Papiamento.aff",
"dictionnaires_francais-5.0.2-fx+tb+sm.xpi", "dictionaries/fr-classic-reform.dic", "dictionaries/fr-classic-reform.aff",
"dictionnaires_francais-5.0.2-fx+tb+sm.xpi", "dictionaries/fr-classic.dic", "dictionaries/fr-classic.aff",
"dictionnaires_francais-5.0.2-fx+tb+sm.xpi", "dictionaries/fr-modern.dic", "dictionaries/fr-modern.aff",
"dictionnaires_francais-5.0.2-fx+tb+sm.xpi", "dictionaries/fr-reform.dic", "dictionaries/fr-reform.aff",
"difazier_an_drouizig-0.12-tb+sm+fx.xpi", "dictionaries/br.dic", "dictionaries/br.aff",
//BUG! "dikshonario_papiamentuantia_hulandes-0.5-fx+tb+fn+sb+sm.xpi", "dictionaries/Papiamentu.dic", "dictionaries/Papiamentu.aff",
"dikshonario_papiamentuantia_hulandes-0.5-fx+tb+fn+sb+sm.xpi", "dictionaries/Papiamentu.dic", "dictionaries/Papiamentu.aff",
"dizionari_furlan-3.1-tb+fx+sm.xpi", "dictionaries/fur-IT.dic", "dictionaries/fur-IT.aff",
"dizionario_italiano-3.3.2-fx+sm+tb.xpi", "dictionaries/it_IT.dic", "dictionaries/it_IT.aff",
"eesti_keele_speller-3.2-fx+tb+sm.xpi", "dictionaries/et-EE.dic", "dictionaries/et-EE.aff",
@ -101,10 +101,10 @@ public class TestAllDictionaries2 extends LuceneTestCase {
"hausa_spelling_dictionary-0.2-tb+fx.xpi", "dictionaries/ha-GH.dic", "dictionaries/ha-GH.aff",
"hebrew_spell_checking_dictionary_from_hspell-1.2.0.1-fx+sm+tb.xpi", "dictionaries/he.dic", "dictionaries/he.aff",
"hindi_spell_checker-0.4-fx+tb+sm+sb+fn.xpi", "dictionaries/hi_IN.dic", "dictionaries/hi_IN.aff",
//BUG! "hungarian_dictionary-1.6.1.1-fx+tb+sm+fn.xpi", "dictionaries/hu_HU.dic", "dictionaries/hu_HU.aff",
//BUG! "icelandic_dictionary-1.3-fx+tb+sm.xpi", "dictionaries/is.dic", "dictionaries/is.aff",
"hungarian_dictionary-1.6.1.1-fx+tb+sm+fn.xpi", "dictionaries/hu.dic", "dictionaries/hu.aff",
//BUG: has no encoding declaration "icelandic_dictionary-1.3-fx+tb+sm.xpi", "dictionaries/is.dic", "dictionaries/is.aff",
"kamus_pengecek_ejaan_bahasa_indonesia-1.1-fx+tb.xpi", "dictionaries/id.dic", "dictionaries/id.aff",
//BUG! "kannada_spell_checker-2.0.1-tb+sm+fn+an+fx.xpi", "dictionaries/kn.dic", "dictionaries/kn.aff",
"kannada_spell_checker-2.0.1-tb+sm+fn+an+fx.xpi", "dictionaries/kn.dic", "dictionaries/kn.aff",
"kashubian_spell_checker_poland-0.9-sm+tb+fx.xpi", "dictionaries/Kaszebsczi.dic", "dictionaries/Kaszebsczi.aff",
"kiswahili_spell_checker-0.3-sb+tb+fn+fx+sm.xpi", "dictionaries/sw_TZ.dic", "dictionaries/sw_TZ.aff",
"kurdish_spell_checker-0.96-fx+tb+sm.xpi", "dictionaries/ku-TR.dic", "dictionaries/ku-TR.aff",
@ -113,8 +113,8 @@ public class TestAllDictionaries2 extends LuceneTestCase {
"lithuanian_spelling_check_dictionary-1.3-fx+tb+sm+fn.xpi", "dictionaries/lt.dic", "dictionaries/lt.aff",
"litreoir_gaelspell_do_mhozilla-4.7-tb+fx+sm+fn.xpi", "dictionaries/ga.dic", "dictionaries/ga.aff",
"litreoir_na_liongailise-0.03-fx+sm+tb.xpi", "dictionaries/ln-CD.dic", "dictionaries/ln-CD.aff",
//BUG! "macedonian_mk_mk_spellchecker-1.2-fn+tb+fx+sm+sb.xpi", "dictionaries/mk-MK-Cyrl.dic", "dictionaries/mk-MK-Cyrl.aff",
//BUG! "macedonian_mk_mk_spellchecker-1.2-fn+tb+fx+sm+sb.xpi", "dictionaries/mk-MK-Latn.dic", "dictionaries/mk-MK-Latn.aff",
"macedonian_mk_mk_spellchecker-1.2-fn+tb+fx+sm+sb.xpi", "dictionaries/mk-MK-Cyrl.dic", "dictionaries/mk-MK-Cyrl.aff",
"macedonian_mk_mk_spellchecker-1.2-fn+tb+fx+sm+sb.xpi", "dictionaries/mk-MK-Latn.dic", "dictionaries/mk-MK-Latn.aff",
"malagasy_spell_checker-0.3-fn+tb+fx+sm+sb.xpi", "dictionaries/mg_MG.dic", "dictionaries/mg_MG.aff",
"marathi_dictionary-9.3-sm+tb+sb+fx.xpi", "dictionaries/mr-IN.dic", "dictionaries/mr-IN.aff",
"ndebele_south_spell_checker-20110323-tb+fn+fx+sm.xpi", "dictionaries/nr-ZA.dic", "dictionaries/nr-ZA.aff",
@ -125,8 +125,8 @@ public class TestAllDictionaries2 extends LuceneTestCase {
"oriya_spell_checker-0.3-fn+tb+fx+sm+sb.xpi", "dictionaries/or-IN.dic", "dictionaries/or-IN.aff",
"polski_slownik_poprawnej_pisowni-1.0.20110621-fx+tb+sm.xpi", "dictionaries/pl.dic", "dictionaries/pl.aff",
"punjabi_spell_checker-0.3-fx+tb+sm+sb+fn.xpi", "dictionaries/pa-IN.dic", "dictionaries/pa-IN.aff",
//BUG! "romanian_spellchecking_dictionary-1.14-sm+tb+fx.xpi", "dictionaries/ro_RO-ante1993.dic", "dictionaries/ro_RO-ante1993.aff",
//BUG! "russian_hunspell_dictionary-1.0.20131101-tb+sm+fn+fx.xpi", "dictionaries/ru_RU.dic", "dictionaries/ru_RU.aff",
"romanian_spellchecking_dictionary-1.14-sm+tb+fx.xpi", "dictionaries/ro_RO-ante1993.dic", "dictionaries/ro_RO-ante1993.aff",
"russian_hunspell_dictionary-1.0.20131101-tb+sm+fn+fx.xpi", "dictionaries/ru_RU.dic", "dictionaries/ru_RU.aff",
"sanskrit_spell_checker-1.1-fx+tb+sm+sb+fn.xpi", "dictionaries/sa_IN.dic", "dictionaries/sa_IN.aff",
"scottish_gaelic_spell_checker-2.7-tb+fx+sm.xpi", "dictionaries/gd-GB.dic", "dictionaries/gd-GB.aff",
"serbian_dictionary-0.18-fx+tb+sm.xpi", "dictionaries/sr-RS-Cyrl.dic", "dictionaries/sr-RS-Cyrl.aff",
@ -146,22 +146,22 @@ public class TestAllDictionaries2 extends LuceneTestCase {
"telugu_spell_checker-0.3-tb+fx+sm.xpi", "dictionaries/te_IN.dic", "dictionaries/te_IN.aff",
"te_papakupu_m__ori-0.9.9.20080630-fx+tb.xpi", "dictionaries/mi-x-Tai Tokerau.dic", "dictionaries/mi-x-Tai Tokerau.aff",
"te_papakupu_m__ori-0.9.9.20080630-fx+tb.xpi", "dictionaries/mi.dic", "dictionaries/mi.aff",
//BUG! "thamizha_solthiruthitamil_spellchecker-0.8-fx+tb.xpi", "dictionaries/ta_IN.dic", "dictionaries/ta_IN.aff",
//BUG: broken file (hunspell refuses to load, too) "thamizha_solthiruthitamil_spellchecker-0.8-fx+tb.xpi", "dictionaries/ta_IN.dic", "dictionaries/ta_IN.aff",
"tsonga_spell_checker-20110323-tb+sm+fx+fn.xpi", "dictionaries/ts-ZA.dic", "dictionaries/ts-ZA.aff",
"tswana_spell_checker-20110323-tb+sm+fx+fn.xpi", "dictionaries/tn-ZA.dic", "dictionaries/tn-ZA.aff",
"turkce_yazm_denetimi-3.5-sm+tb+fx.xpi", "dictionaries/tr.dic", "dictionaries/tr.aff",
//BUG! "turkmen_spell_checker_dictionary-0.1.6-tb+fx+sm.xpi", "dictionaries/tk_TM.dic", "dictionaries/tk_TM.aff",
"turkmen_spell_checker_dictionary-0.1.6-tb+fx+sm.xpi", "dictionaries/tk_TM.dic", "dictionaries/tk_TM.aff",
"ukrainian_dictionary-1.7.0-sm+an+fx+fn+tb.xpi", "dictionaries/uk-UA.dic", "dictionaries/uk-UA.aff",
"united_states_english_spellchecker-7.0.1-sm+tb+fx+an.xpi", "dictionaries/en-US.dic", "dictionaries/en-US.aff",
"upper_sorbian_spelling_dictionary-0.0.20060327.3-tb+fx+sm.xpi", "dictionaries/hsb.dic", "dictionaries/hsb.aff",
//BUG! "urdu_dictionary-0.64-fx+tb+sm+sb.xpi", "dictionaries/ur.dic", "dictionaries/ur.aff",
"urdu_dictionary-0.64-fx+tb+sm+sb.xpi", "dictionaries/ur.dic", "dictionaries/ur.aff",
"uzbek_spell_checker-0.3-fn+tb+fx+sm+sb.xpi", "dictionaries/uz.dic", "dictionaries/uz.aff",
"valencian_catalan_dictionary-2.5.0-tb+fn+sm+fx.xpi", "dictionaries/ca-ES-valencia.dic", "dictionaries/ca-ES-valencia.aff",
"venda_spell_checker-20110323-tb+fn+fx+sm.xpi", "dictionaries/ve-ZA.dic", "dictionaries/ve-ZA.aff",
"verificador_ortografico_para_portugues_do_brasil-2.3-3.2b1-tb+sm+fn+fx.xpi", "dictionaries/pt_BR.dic", "dictionaries/pt_BR.aff",
"vietnamese_dictionary-2.1.0.159-an+sm+tb+fx+fn.xpi", "dictionaries/vi-DauCu.dic", "dictionaries/vi-DauCu.aff",
"vietnamese_dictionary-2.1.0.159-an+sm+tb+fx+fn.xpi", "dictionaries/vi-DauMoi.dic", "dictionaries/vi-DauMoi.aff",
//BUG! "woordenboek_nederlands-3.1.1-sm+tb+fx+fn.xpi", "dictionaries/nl.dic", "dictionaries/nl.aff",
"woordenboek_nederlands-3.1.1-sm+tb+fx+fn.xpi", "dictionaries/nl.dic", "dictionaries/nl.aff",
"xhosa_spell_checker-20110323-tb+fn+fx+sm.xpi", "dictionaries/xh-ZA.dic", "dictionaries/xh-ZA.aff",
"xuxen-4.0.1-fx+tb+sm.xpi", "dictionaries/eu.dic", "dictionaries/eu.aff",
"yiddish_spell_checker_yivo-0.0.3-sm+fn+fx+tb.xpi", "dictionaries/yi.dic", "dictionaries/yi.aff",
@ -196,7 +196,7 @@ public class TestAllDictionaries2 extends LuceneTestCase {
}
public void testOneDictionary() throws Exception {
String toTest = "hausa_spelling_dictionary-0.2-tb+fx.xpi";
String toTest = "hungarian_dictionary-1.6.1.1-fx+tb+sm+fn.xpi";
for (int i = 0; i < tests.length; i++) {
if (tests[i].equals(toTest)) {
File f = new File(DICTIONARY_HOME, tests[i]);
@ -210,7 +210,7 @@ public class TestAllDictionaries2 extends LuceneTestCase {
try (InputStream dictionary = zip.getInputStream(dicEntry);
InputStream affix = zip.getInputStream(affEntry)) {
new Dictionary(affix, dictionary);
new Dictionary(affix, dictionary);
}
}
}

View File

@ -17,6 +17,7 @@ package org.apache.lucene.analysis.hunspell;
* limitations under the License.
*/
import java.io.ByteArrayInputStream;
import java.io.FilterInputStream;
import java.io.IOException;
import java.io.InputStream;
@ -24,6 +25,7 @@ import java.text.ParseException;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CharsRef;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.IntsRef;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.fst.Builder;
@ -77,6 +79,40 @@ public class TestDictionary extends LuceneTestCase {
affixStream.close();
dictStream.close();
}
public void testCompressedBeforeSetDictionary() throws Exception {
InputStream affixStream = getClass().getResourceAsStream("compressed-before-set.aff");
InputStream dictStream = getClass().getResourceAsStream("compressed.dic");
Dictionary dictionary = new Dictionary(affixStream, dictStream);
assertEquals(3, dictionary.lookupSuffix(new char[]{'e'}, 0, 1).length);
assertEquals(1, dictionary.lookupPrefix(new char[]{'s'}, 0, 1).length);
IntsRef ordList = dictionary.lookupWord(new char[]{'o', 'l', 'r'}, 0, 3);
BytesRef ref = new BytesRef();
dictionary.flagLookup.get(ordList.ints[0], ref);
char flags[] = Dictionary.decodeFlags(ref);
assertEquals(1, flags.length);
affixStream.close();
dictStream.close();
}
public void testCompressedEmptyAliasDictionary() throws Exception {
InputStream affixStream = getClass().getResourceAsStream("compressed-empty-alias.aff");
InputStream dictStream = getClass().getResourceAsStream("compressed.dic");
Dictionary dictionary = new Dictionary(affixStream, dictStream);
assertEquals(3, dictionary.lookupSuffix(new char[]{'e'}, 0, 1).length);
assertEquals(1, dictionary.lookupPrefix(new char[]{'s'}, 0, 1).length);
IntsRef ordList = dictionary.lookupWord(new char[]{'o', 'l', 'r'}, 0, 3);
BytesRef ref = new BytesRef();
dictionary.flagLookup.get(ordList.ints[0], ref);
char flags[] = Dictionary.decodeFlags(ref);
assertEquals(1, flags.length);
affixStream.close();
dictStream.close();
}
// malformed rule causes ParseException
public void testInvalidData() throws Exception {
@ -87,7 +123,7 @@ public class TestDictionary extends LuceneTestCase {
new Dictionary(affixStream, dictStream);
fail("didn't get expected exception");
} catch (ParseException expected) {
assertEquals("The affix file contains a rule with less than five elements", expected.getMessage());
assertTrue(expected.getMessage().startsWith("The affix file contains a rule with less than four elements"));
assertEquals(24, expected.getErrorOffset());
}
@ -178,4 +214,16 @@ public class TestDictionary extends LuceneTestCase {
Dictionary.applyMappings(fst, sb);
assertEquals("ghghghde", sb.toString());
}
public void testSetWithCrazyWhitespaceAndBOMs() throws Exception {
assertEquals("UTF-8", Dictionary.getDictionaryEncoding(new ByteArrayInputStream("SET\tUTF-8\n".getBytes(IOUtils.CHARSET_UTF_8))));
assertEquals("UTF-8", Dictionary.getDictionaryEncoding(new ByteArrayInputStream("SET\t UTF-8\n".getBytes(IOUtils.CHARSET_UTF_8))));
assertEquals("UTF-8", Dictionary.getDictionaryEncoding(new ByteArrayInputStream("\uFEFFSET\tUTF-8\n".getBytes(IOUtils.CHARSET_UTF_8))));
assertEquals("UTF-8", Dictionary.getDictionaryEncoding(new ByteArrayInputStream("\uFEFFSET\tUTF-8\r\n".getBytes(IOUtils.CHARSET_UTF_8))));
}
public void testFlagWithCrazyWhitespace() throws Exception {
assertNotNull(Dictionary.getFlagParsingStrategy("FLAG\tUTF-8"));
assertNotNull(Dictionary.getFlagParsingStrategy("FLAG UTF-8"));
}
}

View File

@ -0,0 +1,36 @@
package org.apache.lucene.analysis.hunspell;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.junit.BeforeClass;
public class TestEscaped extends StemmerTestBase {
@BeforeClass
public static void beforeClass() throws Exception {
init("escaped.aff", "escaped.dic");
}
public void testStemming() {
assertStemsTo("works", "work");
assertStemsTo("work", "work");
assertStemsTo("R2/D2", "R2/D2");
assertStemsTo("R2/D2s", "R2/D2");
assertStemsTo("N/A", "N/A");
assertStemsTo("N/As");
}
}

View File

@ -31,6 +31,7 @@ import org.apache.lucene.analysis.hunspell.Dictionary;
import org.apache.lucene.analysis.hunspell.HunspellStemFilter;
import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.lucene.util.IOUtils;
import org.junit.AfterClass;
import org.junit.BeforeClass;
@ -39,9 +40,13 @@ public class TestHunspellStemFilter extends BaseTokenStreamTestCase {
@BeforeClass
public static void beforeClass() throws Exception {
try (InputStream affixStream = TestStemmer.class.getResourceAsStream("simple.aff");
InputStream dictStream = TestStemmer.class.getResourceAsStream("simple.dic")) {
// no multiple try-with to workaround bogus VerifyError
InputStream affixStream = TestStemmer.class.getResourceAsStream("simple.aff");
InputStream dictStream = TestStemmer.class.getResourceAsStream("simple.dic");
try {
dictionary = new Dictionary(affixStream, dictStream);
} finally {
IOUtils.closeWhileHandlingException(affixStream, dictStream);
}
}
@ -97,9 +102,13 @@ public class TestHunspellStemFilter extends BaseTokenStreamTestCase {
public void testIgnoreCaseNoSideEffects() throws Exception {
final Dictionary d;
try (InputStream affixStream = TestStemmer.class.getResourceAsStream("simple.aff");
InputStream dictStream = TestStemmer.class.getResourceAsStream("simple.dic")) {
// no multiple try-with to workaround bogus VerifyError
InputStream affixStream = TestStemmer.class.getResourceAsStream("simple.aff");
InputStream dictStream = TestStemmer.class.getResourceAsStream("simple.dic");
try {
d = new Dictionary(affixStream, Collections.singletonList(dictStream), true);
} finally {
IOUtils.closeWhileHandlingException(affixStream, dictStream);
}
Analyzer a = new Analyzer() {
@Override

View File

@ -0,0 +1,42 @@
package org.apache.lucene.analysis.hunspell;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.junit.BeforeClass;
public class TestOptionalCondition extends StemmerTestBase {
@BeforeClass
public static void beforeClass() throws Exception {
init("optional-condition.aff", "condition.dic");
}
public void testStemming() {
assertStemsTo("hello", "hello");
assertStemsTo("try", "try");
assertStemsTo("tried", "try");
assertStemsTo("work", "work");
assertStemsTo("worked", "work");
assertStemsTo("rework", "work");
assertStemsTo("reworked", "work");
assertStemsTo("retried");
assertStemsTo("workied");
assertStemsTo("tryed");
assertStemsTo("tryied");
assertStemsTo("helloed");
}
}

View File

@ -19,6 +19,6 @@ SFX E 0 d o
PFX B Y 1
PFX B 0 s o
#wrong rule (only 4 elements)
#wrong rule (only 3 elements)
PFX A0 Y 1
PFX A0 0 a
PFX A0 0

View File

@ -0,0 +1,29 @@
SET UTF-8
TRY abcdefghijklmopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ
FLAG long
AF 5
AF AA
AF BB
AF CC
AF DD
AF EE
SFX AA Y 3
SFX AA 0 e n
SFX AA 0 e t
SFX AA 0 e h
SFX CC Y 2
SFX CC 0 d/3 c
SFX CC 0 c b
SFX DD Y 1
SFX DD 0 s o
SFX EE Y 1
SFX EE 0 d o
PFX BB Y 1
PFX BB 0 s o

View File

@ -0,0 +1,30 @@
SET UTF-8
TRY abcdefghijklmopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ
FLAG long
AF 6
AF AA
AF BB
AF CC
AF DD
AF EE
AF
SFX AA Y 3
SFX AA 0 e n
SFX AA 0 e t
SFX AA 0 e h
SFX CC Y 2
SFX CC 0 d/3 c
SFX CC 0 c b
SFX DD Y 1
SFX DD 0 s o
SFX EE Y 1
SFX EE 0 d o
PFX BB Y 1
PFX BB 0 s o

View File

@ -1,8 +1,3 @@
SET UTF-8
TRY abcdefghijklmopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ
FLAG long
AF 5
AF AA
AF BB
@ -10,6 +5,11 @@ AF CC
AF DD
AF EE
SET UTF-8
TRY abcdefghijklmopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ
FLAG long
SFX AA Y 3
SFX AA 0 e n
SFX AA 0 e t

View File

@ -0,0 +1,4 @@
SET UTF-8
SFX A Y 1
SFX A 0 s . +PLUR

View File

@ -0,0 +1,4 @@
3
work/A
R2\/D2/A
N\/A

View File

@ -0,0 +1,14 @@
SET UTF-8
TRY esianrtolcdugmphbyfvkwzESIANRTOLCDUGMPHBYFVKWZ
REP 2
REP f ph
REP ph f
# has no condition
PFX A Y 1
PFX A 0 re
SFX B Y 2
SFX B 0 ed [^y]
SFX B y ied y

View File

@ -19,7 +19,7 @@ package org.apache.lucene.search;
import java.util.Collection;
/** Used by {@link BulkScorers} that need to pass a {@link
/** Used by {@link BulkScorer}s that need to pass a {@link
* Scorer} to {@link Collector#setScorer}. */
final class FakeScorer extends Scorer {
float score;

View File

@ -26,12 +26,13 @@ import org.apache.lucene.index.AtomicReader;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.Bits.MatchNoBits;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.LineFileDocs;
@ -121,8 +122,11 @@ public class TestReuseDocsEnum extends LuceneTestCase {
public void testReuseDocsEnumDifferentReader() throws IOException {
Directory dir = newDirectory();
Codec cp = TestUtil.alwaysPostingsFormat(new Lucene40RWPostingsFormat());
MockAnalyzer analyzer = new MockAnalyzer(random());
analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH));
RandomIndexWriter writer = new RandomIndexWriter(random(), dir,
newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())).setCodec(cp));
newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer).setCodec(cp));
int numdocs = atLeast(20);
createRandomIndex(numdocs, writer, random());
writer.commit();

View File

@ -139,8 +139,11 @@ public class TestBackwardsCompatibility extends LuceneTestCase {
mp.setUseCompoundFile(false);
mp.setNoCFSRatio(1.0);
mp.setMaxCFSSegmentSizeMB(Double.POSITIVE_INFINITY);
MockAnalyzer analyzer = new MockAnalyzer(random());
analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH));
// TODO: remove randomness
IndexWriterConfig conf = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()))
IndexWriterConfig conf = new IndexWriterConfig(TEST_VERSION_CURRENT, analyzer)
.setMergePolicy(mp);
conf.setCodec(Codec.forName("Lucene40"));
IndexWriter writer = new IndexWriter(dir, conf);

View File

@ -30,6 +30,7 @@ import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.LineFileDocs;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.TestUtil;
/**
*
@ -41,8 +42,11 @@ public class TestCustomNorms extends LuceneTestCase {
public void testFloatNorms() throws IOException {
Directory dir = newDirectory();
MockAnalyzer analyzer = new MockAnalyzer(random());
analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH));
IndexWriterConfig config = newIndexWriterConfig(TEST_VERSION_CURRENT,
new MockAnalyzer(random()));
analyzer);
Similarity provider = new MySimProvider();
config.setSimilarity(provider);
RandomIndexWriter writer = new RandomIndexWriter(random(), dir, config);

View File

@ -20,7 +20,6 @@ package org.apache.lucene.index;
import java.io.IOException;
import java.util.Random;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.document.Document;
@ -63,9 +62,12 @@ public class TestDuelingCodecs extends LuceneTestCase {
long seed = random().nextLong();
// must use same seed because of random payloads, etc
Analyzer leftAnalyzer = new MockAnalyzer(new Random(seed));
Analyzer rightAnalyzer = new MockAnalyzer(new Random(seed));
int maxTermLength = TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH);
MockAnalyzer leftAnalyzer = new MockAnalyzer(new Random(seed));
leftAnalyzer.setMaxTokenLength(maxTermLength);
MockAnalyzer rightAnalyzer = new MockAnalyzer(new Random(seed));
rightAnalyzer.setMaxTokenLength(maxTermLength);
// but these can be different
// TODO: this turns this into a really big test of Multi*, is that what we want?
IndexWriterConfig leftConfig = newIndexWriterConfig(TEST_VERSION_CURRENT, leftAnalyzer);

View File

@ -29,6 +29,7 @@ import org.apache.lucene.store.Directory;
import org.apache.lucene.store.MockDirectoryWrapper;
import org.apache.lucene.util.LineFileDocs;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.TestUtil;
import org.junit.AfterClass;
import org.junit.BeforeClass;
@ -64,8 +65,11 @@ public class TestFlushByRamOrCountsPolicy extends LuceneTestCase {
AtomicInteger numDocs = new AtomicInteger(numDocumentsToIndex);
Directory dir = newDirectory();
MockDefaultFlushPolicy flushPolicy = new MockDefaultFlushPolicy();
MockAnalyzer analyzer = new MockAnalyzer(random());
analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH));
IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT,
new MockAnalyzer(random())).setFlushPolicy(flushPolicy);
analyzer).setFlushPolicy(flushPolicy);
final int numDWPT = 1 + atLeast(2);
DocumentsWriterPerThreadPool threadPool = new ThreadAffinityDocumentsWriterThreadPool(
numDWPT);

View File

@ -54,7 +54,10 @@ public class TestForceMergeForever extends LuceneTestCase {
public void test() throws Exception {
final Directory d = newDirectory();
final MyIndexWriter w = new MyIndexWriter(d, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())));
MockAnalyzer analyzer = new MockAnalyzer(random());
analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH));
final MyIndexWriter w = new MyIndexWriter(d, newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer));
// Try to make an index that requires merging:
w.getConfig().setMaxBufferedDocs(TestUtil.nextInt(random(), 2, 11));

View File

@ -51,7 +51,9 @@ public class TestIndexWriterOutOfFileDescriptors extends LuceneTestCase {
System.out.println("TEST: iter=" + iter);
}
try {
IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
MockAnalyzer analyzer = new MockAnalyzer(random());
analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH));
IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
if (VERBOSE) {
// Do this ourselves instead of relying on LTC so

View File

@ -548,7 +548,10 @@ public class TestIndexWriterWithThreads extends LuceneTestCase {
final int threadCount = TestUtil.nextInt(random(), 2, 6);
final AtomicReference<IndexWriter> writerRef = new AtomicReference<IndexWriter>();
writerRef.set(new IndexWriter(d, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()))));
MockAnalyzer analyzer = new MockAnalyzer(random());
analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH));
writerRef.set(new IndexWriter(d, newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer)));
final LineFileDocs docs = new LineFileDocs(random());
final Thread[] threads = new Thread[threadCount];
final int iters = atLeast(100);

View File

@ -75,7 +75,10 @@ public class TestNorms extends LuceneTestCase {
// LUCENE-1260
public void testCustomEncoder() throws Exception {
Directory dir = newDirectory();
IndexWriterConfig config = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
MockAnalyzer analyzer = new MockAnalyzer(random());
analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH));
IndexWriterConfig config = newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
config.setSimilarity(new CustomNormEncodingSimilarity());
RandomIndexWriter writer = new RandomIndexWriter(random(), dir, config);
Document doc = new Document();

View File

@ -46,7 +46,10 @@ public class TestRollingUpdates extends LuceneTestCase {
Codec.setDefault(TestUtil.alwaysPostingsFormat(new MemoryPostingsFormat(random().nextBoolean(), random.nextFloat())));
}
final IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())));
MockAnalyzer analyzer = new MockAnalyzer(random());
analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH));
final IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer));
final int SIZE = atLeast(20);
int id = 0;
IndexReader r = null;

View File

@ -44,7 +44,9 @@ public class TestTermsEnum extends LuceneTestCase {
Random random = new Random(random().nextLong());
final LineFileDocs docs = new LineFileDocs(random, true);
final Directory d = newDirectory();
final RandomIndexWriter w = new RandomIndexWriter(random(), d);
MockAnalyzer analyzer = new MockAnalyzer(random());
analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH));
final RandomIndexWriter w = new RandomIndexWriter(random(), d, analyzer);
final int numDocs = atLeast(10);
for(int docCount=0;docCount<numDocs;docCount++) {
w.addDocument(docs.nextDoc());

View File

@ -24,8 +24,10 @@ import java.util.List;
import java.util.Map;
import java.util.concurrent.CountDownLatch;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.MultiFields;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.Term;
@ -36,13 +38,14 @@ import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LineFileDocs;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.TestUtil;
import org.apache.lucene.util.TestUtil;
public class TestSameScoresWithThreads extends LuceneTestCase {
public void test() throws Exception {
final Directory dir = newDirectory();
final RandomIndexWriter w = new RandomIndexWriter(random(), dir);
MockAnalyzer analyzer = new MockAnalyzer(random());
analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH));
final RandomIndexWriter w = new RandomIndexWriter(random(), dir, analyzer);
LineFileDocs docs = new LineFileDocs(random());
int charsToIndex = atLeast(100000);
int charsIndexed = 0;

View File

@ -38,14 +38,15 @@ import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LineFileDocs;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.TestUtil;
import org.apache.lucene.util.TestUtil;
public class TestNRTCachingDirectory extends LuceneTestCase {
public void testNRTAndCommit() throws Exception {
Directory dir = newDirectory();
NRTCachingDirectory cachedDir = new NRTCachingDirectory(dir, 2.0, 25.0);
IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
MockAnalyzer analyzer = new MockAnalyzer(random());
analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH));
IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
RandomIndexWriter w = new RandomIndexWriter(random(), cachedDir, conf);
final LineFileDocs docs = new LineFileDocs(random(), true);
final int numDocs = TestUtil.nextInt(random(), 100, 400);

View File

@ -292,7 +292,10 @@ public class TestFSTs extends LuceneTestCase {
final LineFileDocs docs = new LineFileDocs(random(), true);
final int RUN_TIME_MSEC = atLeast(500);
final IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())).setMaxBufferedDocs(-1).setRAMBufferSizeMB(64);
MockAnalyzer analyzer = new MockAnalyzer(random());
analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH));
final IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer).setMaxBufferedDocs(-1).setRAMBufferSizeMB(64);
final File tempDir = TestUtil.getTempDir("fstlines");
final Directory dir = newFSDirectory(tempDir);
final IndexWriter writer = new IndexWriter(dir, conf);

View File

@ -128,7 +128,7 @@ public abstract class AbstractAllGroupHeadsCollector<GH extends AbstractAllGroup
@Override
public boolean acceptsDocsOutOfOrder() {
return true;
return false;
}
/**

View File

@ -380,10 +380,7 @@ public class AllGroupHeadsCollectorTest extends LuceneTestCase {
System.out.println("\n===================================================================================");
}
assertEquals(expectedGroupHeads.length, actualGroupHeads.length);
for (int i = 0; i < expectedGroupHeads.length; i++) {
assertEquals(expectedGroupHeads[i], actualGroupHeads[i]);
}
assertArrayEquals(expectedGroupHeads, actualGroupHeads);
}
} finally {
QueryUtils.purgeFieldCache(r);

View File

@ -436,6 +436,7 @@ public class MemoryIndexTest extends BaseTokenStreamTestCase {
for (int i = 0; i < numDocs; i++) {
Directory dir = newDirectory();
MockAnalyzer mockAnalyzer = new MockAnalyzer(random());
mockAnalyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH));
IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(random(), TEST_VERSION_CURRENT, mockAnalyzer));
Document nextDoc = lineFileDocs.nextDoc();
Document doc = new Document();

View File

@ -222,24 +222,14 @@ public final class TermsFilter extends Filter {
}
TermsFilter test = (TermsFilter) obj;
if (test.hashCode == hashCode && this.termsAndFields.length == test.termsAndFields.length) {
// first check the fields before even comparing the bytes
for (int i = 0; i < termsAndFields.length; i++) {
TermsAndField current = termsAndFields[i];
if (!current.equals(test.termsAndFields[i])) {
return false;
}
// first check the fields before even comparing the bytes
if (test.hashCode == hashCode && Arrays.equals(termsAndFields, test.termsAndFields)) {
int lastOffset = termsAndFields[termsAndFields.length - 1].end;
// compare offsets since we sort they must be identical
if (ArrayUtil.equals(offsets, 0, test.offsets, 0, lastOffset + 1)) {
// straight byte comparison since we sort they must be identical
return ArrayUtil.equals(termsBytes, 0, test.termsBytes, 0, offsets[lastOffset]);
}
// straight byte comparison since we sort they must be identical
int end = offsets[termsAndFields.length];
byte[] left = this.termsBytes;
byte[] right = test.termsBytes;
for(int i=0;i < end;i++) {
if (left[i] != right[i]) {
return false;
}
}
return true;
}
return false;
}

View File

@ -17,19 +17,29 @@ package org.apache.lucene.queries;
* limitations under the License.
*/
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashSet;
import java.util.List;
import java.util.Random;
import java.util.Set;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.AtomicReader;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.SlowCompositeReaderWrapper;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermContext;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
@ -45,19 +55,13 @@ import org.apache.lucene.util.PriorityQueue;
import org.apache.lucene.util.TestUtil;
import org.junit.Test;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashSet;
import java.util.List;
import java.util.Random;
import java.util.Set;
public class CommonTermsQueryTest extends LuceneTestCase {
public void testBasics() throws IOException {
Directory dir = newDirectory();
RandomIndexWriter w = new RandomIndexWriter(random(), dir);
MockAnalyzer analyzer = new MockAnalyzer(random());
analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH));
RandomIndexWriter w = new RandomIndexWriter(random(), dir, analyzer);
String[] docs = new String[] {"this is the end of the world right",
"is this it or maybe not",
"this is the end of the universe as we know it",
@ -186,7 +190,9 @@ public class CommonTermsQueryTest extends LuceneTestCase {
public void testMinShouldMatch() throws IOException {
Directory dir = newDirectory();
RandomIndexWriter w = new RandomIndexWriter(random(), dir);
MockAnalyzer analyzer = new MockAnalyzer(random());
analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH));
RandomIndexWriter w = new RandomIndexWriter(random(), dir, analyzer);
String[] docs = new String[] {"this is the end of the world right",
"is this it or maybe not",
"this is the end of the universe as we know it",
@ -344,7 +350,9 @@ public class CommonTermsQueryTest extends LuceneTestCase {
@Test
public void testExtend() throws IOException {
Directory dir = newDirectory();
RandomIndexWriter w = new RandomIndexWriter(random(), dir);
MockAnalyzer analyzer = new MockAnalyzer(random());
analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH));
RandomIndexWriter w = new RandomIndexWriter(random(), dir, analyzer);
String[] docs = new String[] {"this is the end of the world right",
"is this it or maybe not",
"this is the end of the universe as we know it",
@ -397,7 +405,9 @@ public class CommonTermsQueryTest extends LuceneTestCase {
public void testRandomIndex() throws IOException {
Directory dir = newDirectory();
RandomIndexWriter w = new RandomIndexWriter(random(), dir);
MockAnalyzer analyzer = new MockAnalyzer(random());
analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH));
RandomIndexWriter w = new RandomIndexWriter(random(), dir, analyzer);
createRandomIndex(atLeast(50), w, random().nextLong());
DirectoryReader reader = w.getReader();
AtomicReader wrapper = SlowCompositeReaderWrapper.wrap(reader);

View File

@ -50,7 +50,6 @@ import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.TestUtil;
import org.apache.lucene.util.TestUtil;
public class TermsFilterTest extends LuceneTestCase {
@ -297,7 +296,15 @@ public class TermsFilterTest extends LuceneTestCase {
}
}
}
public void testSingleFieldEquals() {
// Two terms with the same hash code
assertEquals("AaAaBB".hashCode(), "BBBBBB".hashCode());
TermsFilter left = termsFilter(true, new Term("id", "AaAaAa"), new Term("id", "AaAaBB"));
TermsFilter right = termsFilter(true, new Term("id", "AaAaAa"), new Term("id", "BBBBBB"));
assertFalse(left.equals(right));
}
public void testNoTerms() {
List<Term> emptyTerms = Collections.emptyList();
List<BytesRef> emptyBytesRef = Collections.emptyList();

View File

@ -1379,7 +1379,9 @@ public abstract class BasePostingsFormatTestCase extends LuceneTestCase {
// during flush/merge
public void testInvertedWrite() throws Exception {
Directory dir = newDirectory();
IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
MockAnalyzer analyzer = new MockAnalyzer(random());
analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH));
IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
// Must be concurrent because thread(s) can be merging
// while up to one thread flushes, and each of those

View File

@ -0,0 +1,110 @@
package org.apache.lucene.search;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import java.lang.ref.WeakReference;
import java.util.Collection;
import java.util.Collections;
import java.util.Map;
import java.util.Random;
import java.util.WeakHashMap;
import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.util.VirtualMethod;
/** A crazy {@link BulkScorer} that wraps a {@link Scorer}
* but shuffles the order of the collected documents. */
public class AssertingBulkOutOfOrderScorer extends BulkScorer {
final Random random;
final Scorer scorer;
public AssertingBulkOutOfOrderScorer(Random random, Scorer scorer) {
this.random = random;
this.scorer = scorer;
}
private void shuffle(int[] docIDs, float[] scores, int[] freqs, int size) {
for (int i = size - 1; i > 0; --i) {
final int other = random.nextInt(i + 1);
final int tmpDoc = docIDs[i];
docIDs[i] = docIDs[other];
docIDs[other] = tmpDoc;
final float tmpScore = scores[i];
scores[i] = scores[other];
scores[other] = tmpScore;
final int tmpFreq = freqs[i];
freqs[i] = freqs[other];
freqs[other] = tmpFreq;
}
}
private static void flush(int[] docIDs, float[] scores, int[] freqs, int size,
FakeScorer scorer, Collector collector) throws IOException {
for (int i = 0; i < size; ++i) {
scorer.doc = docIDs[i];
scorer.freq = freqs[i];
scorer.score = scores[i];
collector.collect(scorer.doc);
}
}
@Override
public boolean score(Collector collector, int max) throws IOException {
if (scorer.docID() == -1) {
scorer.nextDoc();
}
FakeScorer fake = new FakeScorer();
collector.setScorer(fake);
final int bufferSize = 1 + random.nextInt(100);
final int[] docIDs = new int[bufferSize];
final float[] scores = new float[bufferSize];
final int[] freqs = new int[bufferSize];
int buffered = 0;
int doc = scorer.docID();
while (doc < max) {
docIDs[buffered] = doc;
scores[buffered] = scorer.score();
freqs[buffered] = scorer.freq();
if (++buffered == bufferSize) {
shuffle(docIDs, scores, freqs, buffered);
flush(docIDs, scores, freqs, buffered, fake, collector);
buffered = 0;
}
doc = scorer.nextDoc();
}
shuffle(docIDs, scores, freqs, buffered);
flush(docIDs, scores, freqs, buffered, fake, collector);
return doc != Scorer.NO_MORE_DOCS;
}
@Override
public String toString() {
return "AssertingBulkOutOfOrderScorer(" + scorer + ")";
}
}

View File

@ -34,18 +34,11 @@ public class AssertingBulkScorer extends BulkScorer {
private static final VirtualMethod<BulkScorer> SCORE_COLLECTOR = new VirtualMethod<BulkScorer>(BulkScorer.class, "score", Collector.class);
private static final VirtualMethod<BulkScorer> SCORE_COLLECTOR_RANGE = new VirtualMethod<BulkScorer>(BulkScorer.class, "score", Collector.class, int.class);
// we need to track scorers using a weak hash map because otherwise we
// could loose references because of eg.
// AssertingScorer.score(Collector) which needs to delegate to work correctly
private static Map<BulkScorer, WeakReference<AssertingBulkScorer>> ASSERTING_INSTANCES = Collections.synchronizedMap(new WeakHashMap<BulkScorer, WeakReference<AssertingBulkScorer>>());
public static BulkScorer wrap(Random random, BulkScorer other) {
if (other == null || other instanceof AssertingBulkScorer) {
return other;
}
final AssertingBulkScorer assertScorer = new AssertingBulkScorer(random, other);
ASSERTING_INSTANCES.put(other, new WeakReference<AssertingBulkScorer>(assertScorer));
return assertScorer;
return new AssertingBulkScorer(random, other);
}
public static boolean shouldWrap(BulkScorer inScorer) {
@ -87,4 +80,5 @@ public class AssertingBulkScorer extends BulkScorer {
public String toString() {
return "AssertingBulkScorer(" + in + ")";
}
}

View File

@ -29,12 +29,14 @@ class AssertingWeight extends Weight {
return other instanceof AssertingWeight ? other : new AssertingWeight(random, other);
}
final boolean scoresDocsOutOfOrder;
final Random random;
final Weight in;
AssertingWeight(Random random, Weight in) {
this.random = random;
this.in = in;
scoresDocsOutOfOrder = in.scoresDocsOutOfOrder() || random.nextBoolean();
}
@Override
@ -73,8 +75,21 @@ class AssertingWeight extends Weight {
if (inScorer == null) {
return null;
}
if (AssertingBulkScorer.shouldWrap(inScorer)) {
// The incoming scorer already has a specialized
// implementation for BulkScorer, so we should use it:
return AssertingBulkScorer.wrap(new Random(random.nextLong()), inScorer);
} else if (scoreDocsInOrder == false && random.nextBoolean()) {
// The caller claims it can handle out-of-order
// docs; let's confirm that by pulling docs and
// randomly shuffling them before collection:
//Scorer scorer = in.scorer(context, acceptDocs);
Scorer scorer = scorer(context, acceptDocs);
// Scorer should not be null if bulkScorer wasn't:
assert scorer != null;
return new AssertingBulkOutOfOrderScorer(new Random(random.nextLong()), scorer);
} else {
// Let super wrap this.scorer instead, so we use
// AssertingScorer:
@ -84,8 +99,7 @@ class AssertingWeight extends Weight {
@Override
public boolean scoresDocsOutOfOrder() {
return in.scoresDocsOutOfOrder();
return scoresDocsOutOfOrder;
}
}

View File

@ -329,7 +329,7 @@ public class QueryUtils {
@Override
public boolean acceptsDocsOutOfOrder() {
return true;
return false;
}
});

View File

@ -449,7 +449,9 @@ public abstract class ShardSearchingTestBase extends LuceneTestCase {
myNodeID = nodeID;
dir = newFSDirectory(TestUtil.getTempDir("ShardSearchingTestBase"));
// TODO: set warmer
IndexWriterConfig iwc = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random));
MockAnalyzer analyzer = new MockAnalyzer(random());
analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH));
IndexWriterConfig iwc = new IndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
if (VERBOSE) {
iwc.setInfoStream(new PrintStreamInfoStream(System.out));

View File

@ -138,6 +138,12 @@ Bug Fixes
* SOLR-5818: distrib search with custom comparator does not quite work correctly
(Ryan Ernst)
* SOLR-5834: Overseer threads are only being interrupted and not closed.
(hossman, Mark Miller)
* SOLR-5839: ZookeeperInfoServlet does not trim path properly.
(Furkan KAMACI via Mark Miller)
Optimizations
----------------------
* SOLR-1880: Distributed Search skips GET_FIELDS stage if EXECUTE_QUERY
@ -181,6 +187,13 @@ Other Changes
* SOLR-5796: Make how long we are willing to wait for a core to see the ZK
advertised leader in it's local state configurable.
(Timothy Potter via Mark Miller)
* SOLR-5825: Separate http request creating and execution in SolrJ
(Steven Bower via Erick Erickson)
* SOLR-5837: Add hashCode/equals to SolrDocument, SolrInputDocument
and SolrInputField for testing purposes. (Varun Thacker, Noble Paul,
Mark Miller)
================== 4.7.0 ==================

View File

@ -81,8 +81,8 @@ public class Overseer {
//Internal queue where overseer stores events that have not yet been published into cloudstate
//If Overseer dies while extracting the main queue a new overseer will start from this queue
private final DistributedQueue workQueue;
private volatile boolean isClosed;
private Map clusterProps;
private boolean isClosed = false;
public ClusterStateUpdater(final ZkStateReader reader, final String myId) {
this.zkClient = reader.getZkClient();
@ -1030,20 +1030,22 @@ public class Overseer {
class OverseerThread extends Thread implements ClosableThread {
private volatile boolean isClosed;
protected volatile boolean isClosed;
private ClosableThread thread;
public OverseerThread(ThreadGroup tg,
ClusterStateUpdater clusterStateUpdater) {
super(tg, clusterStateUpdater);
public OverseerThread(ThreadGroup tg, ClosableThread thread) {
super(tg, (Runnable) thread);
this.thread = thread;
}
public OverseerThread(ThreadGroup ccTg,
OverseerCollectionProcessor overseerCollectionProcessor, String string) {
super(ccTg, overseerCollectionProcessor, string);
public OverseerThread(ThreadGroup ccTg, ClosableThread thread, String name) {
super(ccTg, (Runnable) thread, name);
this.thread = thread;
}
@Override
public void close() {
thread.close();
this.isClosed = true;
}
@ -1084,8 +1086,7 @@ public class Overseer {
ThreadGroup ccTg = new ThreadGroup("Overseer collection creation process.");
ocp = new OverseerCollectionProcessor(reader, id, shardHandler, adminPath);
ccThread = new OverseerThread(ccTg, ocp,
"Overseer-" + id);
ccThread = new OverseerThread(ccTg, ocp, "Overseer-" + id);
ccThread.setDaemon(true);
updaterThread.start();

View File

@ -195,7 +195,7 @@ public final class ZookeeperInfoServlet extends HttpServlet {
if (path == null) {
path = "/";
} else {
path.trim();
path = path.trim();
if (path.length() == 0) {
path = "/";
}

View File

@ -23,7 +23,9 @@ import org.apache.lucene.index.AtomicReader;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.SlowCompositeReaderWrapper;
import org.apache.lucene.search.similarities.DefaultSimilarity;
import org.apache.lucene.util.TestUtil;
import org.apache.solr.SolrTestCaseJ4;
import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.params.CommonParams;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrInputDocument;
@ -394,4 +396,106 @@ public class DocumentBuilderTest extends SolrTestCaseJ4 {
assertNull(h.validateUpdate(add(xml, new String[0])));
}
public void testSolrDocumentEquals() {
String randomString = TestUtil.randomSimpleString(random());
SolrDocument doc1 = new SolrDocument();
doc1.addField("foo", randomString);
SolrDocument doc2 = new SolrDocument();
doc2.addField("foo", randomString);
assertTrue(doc1.equals(doc2));
doc1.addField("foo", "bar");
assertFalse(doc1.equals(doc2));
doc1 = new SolrDocument();
doc1.addField("bar", randomString);
assertFalse(doc1.equals(doc2));
int randomInt = random().nextInt();
doc1 = new SolrDocument();
doc1.addField("foo", randomInt);
doc2 = new SolrDocument();
doc2.addField("foo", randomInt);
assertTrue(doc1.equals(doc2));
doc2 = new SolrDocument();
doc2.addField("bar", randomInt);
assertFalse(doc1.equals(doc2));
}
public void testSolrInputDocumentEquality() {
String randomString = TestUtil.randomSimpleString(random());
SolrInputDocument doc1 = new SolrInputDocument();
doc1.addField("foo", randomString);
SolrInputDocument doc2 = new SolrInputDocument();
doc2.addField("foo", randomString);
assertTrue(doc1.equals(doc2));
doc1.setDocumentBoost(1.1f);
assertFalse(doc1.equals(doc2));
doc2.setDocumentBoost(1.1f);
assertTrue(doc1.equals(doc2));
doc2.setDocumentBoost(20f);
assertFalse(doc1.equals(doc2));
doc1 = new SolrInputDocument();
doc1.addField("foo", randomString);
doc2 = new SolrInputDocument();
doc2.addField("foo", randomString);
SolrInputDocument childDoc = new SolrInputDocument();
childDoc.addField("foo", "bar");
doc1.addChildDocument(childDoc);
assertFalse(doc1.equals(doc2));
doc2.addChildDocument(childDoc);
assertTrue(doc1.equals(doc2));
SolrInputDocument childDoc1 = new SolrInputDocument();
childDoc.addField(TestUtil.randomSimpleString(random()), TestUtil.randomSimpleString(random()));
doc2.addChildDocument(childDoc1);
assertFalse(doc1.equals(doc2));
}
public void testSolrInputFieldEquality() {
String randomString = TestUtil.randomSimpleString(random(), 10, 20);
int val = random().nextInt();
SolrInputField sif1 = new SolrInputField(randomString);
sif1.setValue(val, 1.0f);
SolrInputField sif2 = new SolrInputField(randomString);
sif2.setValue(val, 1.0f);
assertTrue(sif1.equals(sif2));
sif1.setBoost(2.1f);
sif2.setBoost(2.1f);
assertTrue(sif1.equals(sif2));
sif2.setBoost(2.0f);
assertFalse(sif1.equals(sif2));
sif2.setName("foo");
assertFalse(sif1.equals(sif2));
}
}

View File

@ -199,8 +199,11 @@ public class HttpSolrServer extends SolrServer {
return request(request, responseParser);
}
public NamedList<Object> request(final SolrRequest request,
final ResponseParser processor) throws SolrServerException, IOException {
public NamedList<Object> request(final SolrRequest request, final ResponseParser processor) throws SolrServerException, IOException {
return executeMethod(createMethod(request),processor);
}
protected HttpRequestBase createMethod(final SolrRequest request) throws IOException, SolrServerException {
HttpRequestBase method = null;
InputStream is = null;
SolrParams params = request.getParams();
@ -382,6 +385,10 @@ public class HttpSolrServer extends SolrServer {
throw new SolrServerException("error reading streams", ex);
}
return method;
}
protected NamedList<Object> executeMethod(HttpRequestBase method, final ResponseParser processor) throws SolrServerException {
// XXX client already has this set, is this needed?
method.getParams().setParameter(ClientPNames.HANDLE_REDIRECTS,
followRedirects);

View File

@ -213,7 +213,41 @@ public class SolrDocument implements Map<String,Object>, Iterable<Map.Entry<Stri
public Iterator<Entry<String, Object>> iterator() {
return _fields.entrySet().iterator();
}
/**
* This method is implemented for tests and should not be counted
* on in production code.
*
* @lucene.experimental
*/
@Override
public boolean equals(Object o) {
if (this == o) {
return true;
}
if (!(o instanceof SolrDocument)) {
return false;
}
SolrDocument solrDocument = (SolrDocument) o;
if (!_fields.equals(solrDocument._fields)) {
return false;
}
return true;
}
/**
* This method is implemented for tests and should not be counted
* on in production code.
*
* @lucene.experimental
*/
@Override
public int hashCode() {
return _fields.hashCode();
}
//-----------------------------------------------------------------------------------------
// JSTL Helpers
//-----------------------------------------------------------------------------------------

View File

@ -275,7 +275,51 @@ public class SolrInputDocument implements Map<String,SolrInputField>, Iterable<S
public Collection<SolrInputField> values() {
return _fields.values();
}
/**
* This method is implemented for tests and should not be counted
* on in production code.
*
* @lucene.experimental
*/
@Override
public boolean equals(Object o) {
if (this == o) {
return true;
}
if (!(o instanceof SolrInputDocument)) {
return false;
}
SolrInputDocument sdoc = (SolrInputDocument) o;
if (!_fields.equals(sdoc._fields)) {
return false;
}
if (Float.compare(sdoc._documentBoost, _documentBoost) != 0) {
return false;
}
if (_childDocuments != null ? !_childDocuments.equals(sdoc._childDocuments) : sdoc._childDocuments != null) {
return false;
}
return true;
}
/**
* This method is implemented for tests and should not be counted
* on in production code.
*
* @lucene.experimental
*/
@Override
public int hashCode() {
int result = _fields.hashCode();
result = 31 * result + (_documentBoost != +0.0f ? Float.floatToIntBits(_documentBoost) : 0);
result = 31 * result + (_childDocuments != null ? _childDocuments.hashCode() : 0);
return result;
}
public void addChildDocument(SolrInputDocument child) {
if (_childDocuments == null) {
_childDocuments = new ArrayList<SolrInputDocument>();

View File

@ -229,4 +229,50 @@ public class SolrInputField implements Iterable<Object>, Serializable
}
return clone;
}
/**
* This method is implemented for tests and should not be counted
* on in production code.
*
* @lucene.experimental
*/
@Override
public boolean equals(Object o) {
if (this == o) {
return true;
}
if (!(o instanceof SolrInputField)) {
return false;
}
SolrInputField sif = (SolrInputField) o;
if (!name.equals(sif.name)) {
return false;
}
if (!value.equals(sif.value)) {
return false;
}
if (Float.compare(sif.boost, boost) != 0) {
return false;
}
return true;
}
/**
* This method is implemented for tests and should not be counted
* on in production code.
*
* @lucene.experimental
*/
@Override
public int hashCode() {
int result = name.hashCode();
result = 31 * result + value.hashCode();
result = 31 * result + (boost != +0.0f ? Float.floatToIntBits(boost) : 0);
return result;
}
}

View File

@ -521,7 +521,7 @@ public class JavaBinCodec {
public Object getValue() {
return value;
}
@Override
public String toString() {
return "MapEntry[" + key.toString() + ":" + value.toString() + "]";
@ -530,7 +530,28 @@ public class JavaBinCodec {
@Override
public Object setValue(Object value) {
throw new UnsupportedOperationException();
}};
}
@Override
public int hashCode() {
int result = 31;
result *=31 + getKey().hashCode();
result *=31 + getValue().hashCode();
return result;
}
@Override
public boolean equals(Object obj) {
if(this == obj) {
return true;
}
if(!(obj instanceof Entry)) {
return false;
}
Map.Entry<Object, Object> entry = (Entry<Object, Object>) obj;
return (this.getKey().equals(entry.getKey()) && this.getValue().equals(entry.getValue()));
}
};
}
/**

Binary file not shown.

View File

@ -17,17 +17,37 @@ package org.apache.solr.common.util;
* limitations under the License.
*/
import java.io.BufferedOutputStream;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Date;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.commons.io.IOUtils;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.TestUtil;
import org.apache.solr.common.EnumFieldValue;
import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrDocumentList;
import org.apache.solr.common.SolrInputDocument;
import org.junit.Test;
public class TestJavaBinCodec extends LuceneTestCase {
private static final String SOLRJ_JAVABIN_BACKCOMPAT_BIN = "/solrj/javabin_backcompat.bin";
private final String BIN_FILE_LOCATION = "./solr/solrj/src/test-files/solrj/javabin_backcompat.bin";
public void testStrings() throws Exception {
JavaBinCodec javabin = new JavaBinCodec();
for (int i = 0; i < 10000*RANDOM_MULTIPLIER; i++) {
for (int i = 0; i < 10000 * RANDOM_MULTIPLIER; i++) {
String s = TestUtil.randomUnicodeString(random());
ByteArrayOutputStream os = new ByteArrayOutputStream();
javabin.marshal(s, os);
@ -36,4 +56,158 @@ public class TestJavaBinCodec extends LuceneTestCase {
assertEquals(s, o);
}
}
private List<Object> generateAllDataTypes() {
List<Object> types = new ArrayList<>();
types.add(null); //NULL
types.add(true);
types.add(false);
types.add((byte) 1);
types.add((short) 2);
types.add((double) 3);
types.add(-4);
types.add(4);
types.add(42);
types.add((long) -5);
types.add((long) 5);
types.add((long) 50);
types.add((float) 6);
types.add(new Date(0));
Map<Integer, Integer> map = new HashMap<>();
map.put(1, 2);
types.add(map);
SolrDocument doc = new SolrDocument();
doc.addField("foo", "bar");
types.add(doc);
SolrDocumentList solrDocs = new SolrDocumentList();
solrDocs.setMaxScore(1.0f);
solrDocs.setNumFound(1);
solrDocs.setStart(0);
solrDocs.add(0, doc);
types.add(solrDocs);
types.add(new byte[] {1,2,3,4,5});
// TODO?
// List<String> list = new ArrayList<String>();
// list.add("one");
// types.add(list.iterator());
types.add((byte) 15); //END
SolrInputDocument idoc = new SolrInputDocument();
idoc.addField("foo", "bar");
types.add(idoc);
SolrInputDocument parentDoc = new SolrInputDocument();
parentDoc.addField("foo", "bar");
SolrInputDocument childDoc = new SolrInputDocument();
childDoc.addField("foo", "bar");
parentDoc.addChildDocument(childDoc);
types.add(parentDoc);
types.add(new EnumFieldValue(1, "foo"));
types.add(map.entrySet().iterator().next()); //Map.Entry
types.add((byte) (1 << 5)); //TAG_AND_LEN
types.add("foo");
types.add(1);
types.add((long) 2);
SimpleOrderedMap simpleOrderedMap = new SimpleOrderedMap();
simpleOrderedMap.add("bar", "barbar");
types.add(simpleOrderedMap);
NamedList<String> nl = new NamedList<>();
nl.add("foo", "barbar");
types.add(nl);
return types;
}
@Test
public void testBackCompat() {
List iteratorAsList = null;
JavaBinCodec javabin = new JavaBinCodec(){
@Override
public List<Object> readIterator(DataInputInputStream fis) throws IOException {
return super.readIterator(fis);
}
};
try {
InputStream is = getClass().getResourceAsStream(SOLRJ_JAVABIN_BACKCOMPAT_BIN);
List<Object> unmarshaledObj = (List<Object>) javabin.unmarshal(is);
List<Object> matchObj = generateAllDataTypes();
assertEquals(unmarshaledObj.size(), matchObj.size());
for(int i=0; i < unmarshaledObj.size(); i++) {
if(unmarshaledObj.get(i) instanceof byte[] && matchObj.get(i) instanceof byte[]) {
byte[] b1 = (byte[]) unmarshaledObj.get(i);
byte[] b2 = (byte[]) matchObj.get(i);
assertTrue(Arrays.equals(b1, b2));
} else {
assertEquals(unmarshaledObj.get(i), matchObj.get(i));
}
}
} catch (IOException e) {
fail(e.getMessage());
}
}
@Test
public void testForwardCompat() {
JavaBinCodec javabin = new JavaBinCodec();
ByteArrayOutputStream os = new ByteArrayOutputStream();
Object data = generateAllDataTypes();
try {
javabin.marshal(data, os);
byte[] newFormatBytes = os.toByteArray();
InputStream is = getClass().getResourceAsStream(SOLRJ_JAVABIN_BACKCOMPAT_BIN);
byte[] currentFormatBytes = IOUtils.toByteArray(is);
for (int i = 1; i < currentFormatBytes.length; i++) {//ignore the first byte. It is version information
assertEquals(currentFormatBytes[i], newFormatBytes[i]);
}
} catch (IOException e) {
e.printStackTrace();
fail(e.getMessage());
}
}
public void genBinaryFile() throws IOException {
JavaBinCodec javabin = new JavaBinCodec();
ByteArrayOutputStream os = new ByteArrayOutputStream();
Object data = generateAllDataTypes();
javabin.marshal(data, os);
byte[] out = os.toByteArray();
FileOutputStream fs = new FileOutputStream(new File(BIN_FILE_LOCATION));
BufferedOutputStream bos = new BufferedOutputStream(fs);
bos.write(out);
bos.close();
}
public static void main(String[] args) throws IOException {
TestJavaBinCodec test = new TestJavaBinCodec();
test.genBinaryFile();
}
}