mirror of https://github.com/apache/lucene.git
LUCENE-5487: merge trunk
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene5487@1576473 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
commit
2d88332e9d
10
build.xml
10
build.xml
|
@ -18,6 +18,12 @@
|
|||
-->
|
||||
|
||||
<project name="lucene-solr" default="-projecthelp" basedir=".">
|
||||
<!-- Look for property definition in various *build.properties files -->
|
||||
<property file="${user.home}/lucene.build.properties"/>
|
||||
<property file="${user.home}/build.properties"/>
|
||||
<property file="${basedir}/build.properties"/>
|
||||
<property file="lucene/build.properties"/><!-- hack for Lucene users, clones Lucene's common-build.xml -->
|
||||
|
||||
<target name="-projecthelp">
|
||||
<java fork="false" classname="org.apache.tools.ant.Main" taskname="-">
|
||||
<arg value="-projecthelp"/>
|
||||
|
@ -268,10 +274,6 @@
|
|||
</target>
|
||||
|
||||
<target name="idea" depends="resolve" description="Setup IntelliJ IDEA configuration">
|
||||
<!-- Look for property definition for ${idea.jdk} in various *build.properties files -->
|
||||
<property file="lucene/build.properties"/> <!-- Look in the current project first -->
|
||||
<property file="${user.home}/lucene.build.properties"/>
|
||||
<property file="${user.home}/build.properties"/>
|
||||
<condition property="idea.jdk.is.set">
|
||||
<isset property="idea.jdk"/>
|
||||
</condition>
|
||||
|
|
|
@ -92,7 +92,7 @@
|
|||
<svn-checker failonmodifications="true"/>
|
||||
</target>
|
||||
|
||||
<property name="svnkit.version" value="1.7.8"/>
|
||||
<property name="svnkit.version" value="1.8.4"/>
|
||||
|
||||
<macrodef xmlns:ivy="antlib:org.apache.ivy.ant" name="svn-checker">
|
||||
<attribute name="failonmodifications" default="true"/> <!-- false if file modifications are allowed -->
|
||||
|
@ -107,8 +107,6 @@
|
|||
import org.tmatesoft.svn.core.wc.*;
|
||||
import org.apache.tools.ant.Project;
|
||||
|
||||
def RECOMMENDED_SVNKIT_18 = '1.8.2';
|
||||
|
||||
SVNClientManager manager = SVNClientManager.newInstance();
|
||||
SVNStatusClient statusClient = manager.getStatusClient();
|
||||
SVNWCClient wcClient = manager.getWCClient();
|
||||
|
@ -124,11 +122,7 @@
|
|||
def ec = ex.getErrorMessage().getErrorCode();
|
||||
int code = ec.getCode();
|
||||
int category = ec.getCategory();
|
||||
if (code == SVNErrorCode.WC_UNSUPPORTED_FORMAT.getCode()) {
|
||||
task.log('WARNING: Unsupported SVN working copy version! Disabling checks...', Project.MSG_WARN);
|
||||
task.log('If your working copy is on version 1.8 already, please pass -Dsvnkit.version=' + RECOMMENDED_SVNKIT_18 + ' to successfully run checks.', Project.MSG_INFO);
|
||||
return;
|
||||
} else if (code == SVNErrorCode.WC_NOT_DIRECTORY.getCode() || code == SVNErrorCode.WC_NOT_FILE.getCode()) {
|
||||
if (code == SVNErrorCode.WC_NOT_DIRECTORY.getCode() || code == SVNErrorCode.WC_NOT_FILE.getCode()) {
|
||||
task.log('WARNING: Development directory is not an SVN checkout! Disabling checks...', Project.MSG_WARN);
|
||||
return;
|
||||
} else if (category == SVNErrorCode.WC_CATEGORY) {
|
||||
|
|
|
@ -147,15 +147,33 @@ Bug fixes
|
|||
recursive affix application are driven correctly by continuation classes in the affix file.
|
||||
(Robert Muir)
|
||||
|
||||
* LUCENE-5497: HunspellStemFilter properly handles escaped terms and affixes without conditions.
|
||||
(Robert Muir)
|
||||
|
||||
* LUCENE-5505: HunspellStemFilter ignores BOM markers in dictionaries and handles varying
|
||||
types of whitespace in SET/FLAG commands. (Robert Muir)
|
||||
|
||||
* LUCENE-5507: Fix HunspellStemFilter loading of dictionaries with large amounts of aliases
|
||||
etc before the encoding declaration. (Robert Muir)
|
||||
|
||||
* LUCENE-5502: Fixed TermsFilter.equals that could return true for different
|
||||
filters. (Igor Motov via Adrien Grand)
|
||||
|
||||
Test Framework
|
||||
|
||||
* LUCENE-5449: Rename _TestUtil and _TestHelper to remove the leading _.
|
||||
|
||||
* LUCENE-5501: Added random out-of-order collection testing (when the collector
|
||||
supports it) to AssertingIndexSearcher. (Adrien Grand)
|
||||
|
||||
Build
|
||||
|
||||
* LUCENE-5463: RamUsageEstimator.(human)sizeOf(Object) is now a forbidden API.
|
||||
(Adrien Grand, Robert Muir)
|
||||
|
||||
* LUCENE-5511: "ant precommit" / "ant check-svn-working-copy" now work again
|
||||
with any working copy format (thanks to svnkit 1.8.4). (Uwe Schindler)
|
||||
|
||||
======================= Lucene 4.7.0 =======================
|
||||
|
||||
New Features
|
||||
|
@ -188,7 +206,7 @@ New Features
|
|||
AnalyzingInfixSuggester but boosts suggestions that matched tokens
|
||||
with lower positions. (Remi Melisson via Mike McCandless)
|
||||
|
||||
* LUCENE-4399: When sorting by String (SortField.STRING), you can now
|
||||
* LUCENE-5399: When sorting by String (SortField.STRING), you can now
|
||||
specify whether missing values should be sorted first (the default),
|
||||
using SortField.setMissingValue(SortField.STRING_FIRST), or last,
|
||||
using SortField.setMissingValue(SortField.STRING_LAST). (Rob Muir,
|
||||
|
|
|
@ -35,12 +35,16 @@ import org.apache.lucene.util.fst.Outputs;
|
|||
import org.apache.lucene.util.fst.Util;
|
||||
|
||||
import java.io.BufferedInputStream;
|
||||
import java.io.BufferedOutputStream;
|
||||
import java.io.BufferedReader;
|
||||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.FileOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.InputStreamReader;
|
||||
import java.io.LineNumberReader;
|
||||
import java.io.OutputStream;
|
||||
import java.nio.charset.Charset;
|
||||
import java.nio.charset.CharsetDecoder;
|
||||
import java.nio.charset.CodingErrorAction;
|
||||
|
@ -54,6 +58,7 @@ import java.util.List;
|
|||
import java.util.Locale;
|
||||
import java.util.Map;
|
||||
import java.util.TreeMap;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
/**
|
||||
|
@ -154,21 +159,41 @@ public class Dictionary {
|
|||
this.ignoreCase = ignoreCase;
|
||||
this.needsInputCleaning = ignoreCase;
|
||||
this.needsOutputCleaning = false; // set if we have an OCONV
|
||||
// TODO: we really need to probably buffer this on disk since so many newer dictionaries
|
||||
// (en_GB, hu_HU, etc) now have tons of AM lines (morph metadata) etc before they finally declare
|
||||
// their encoding... but for now this large buffer is a workaround
|
||||
BufferedInputStream buffered = new BufferedInputStream(affix, 65536);
|
||||
buffered.mark(65536);
|
||||
String encoding = getDictionaryEncoding(buffered);
|
||||
buffered.reset();
|
||||
CharsetDecoder decoder = getJavaEncoding(encoding);
|
||||
readAffixFile(buffered, decoder);
|
||||
flagLookup.add(new BytesRef()); // no flags -> ord 0
|
||||
stripLookup.add(new BytesRef()); // no strip -> ord 0
|
||||
IntSequenceOutputs o = IntSequenceOutputs.getSingleton();
|
||||
Builder<IntsRef> b = new Builder<IntsRef>(FST.INPUT_TYPE.BYTE4, o);
|
||||
readDictionaryFiles(dictionaries, decoder, b);
|
||||
words = b.finish();
|
||||
|
||||
File aff = File.createTempFile("affix", "aff", tempDir);
|
||||
OutputStream out = new BufferedOutputStream(new FileOutputStream(aff));
|
||||
InputStream aff1 = null;
|
||||
InputStream aff2 = null;
|
||||
try {
|
||||
// copy contents of affix stream to temp file
|
||||
final byte [] buffer = new byte [1024 * 8];
|
||||
int len;
|
||||
while ((len = affix.read(buffer)) > 0) {
|
||||
out.write(buffer, 0, len);
|
||||
}
|
||||
out.close();
|
||||
|
||||
// pass 1: get encoding
|
||||
aff1 = new BufferedInputStream(new FileInputStream(aff));
|
||||
String encoding = getDictionaryEncoding(aff1);
|
||||
|
||||
// pass 2: parse affixes
|
||||
CharsetDecoder decoder = getJavaEncoding(encoding);
|
||||
aff2 = new BufferedInputStream(new FileInputStream(aff));
|
||||
readAffixFile(aff2, decoder);
|
||||
|
||||
// read dictionary entries
|
||||
IntSequenceOutputs o = IntSequenceOutputs.getSingleton();
|
||||
Builder<IntsRef> b = new Builder<IntsRef>(FST.INPUT_TYPE.BYTE4, o);
|
||||
readDictionaryFiles(dictionaries, decoder, b);
|
||||
words = b.finish();
|
||||
aliases = null; // no longer needed
|
||||
} finally {
|
||||
IOUtils.closeWhileHandlingException(out, aff1, aff2);
|
||||
aff.delete();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -251,6 +276,10 @@ public class Dictionary {
|
|||
LineNumberReader reader = new LineNumberReader(new InputStreamReader(affixStream, decoder));
|
||||
String line = null;
|
||||
while ((line = reader.readLine()) != null) {
|
||||
// ignore any BOM marker on first line
|
||||
if (reader.getLineNumber() == 1 && line.startsWith("\uFEFF")) {
|
||||
line = line.substring(1);
|
||||
}
|
||||
if (line.startsWith(ALIAS_KEY)) {
|
||||
parseAlias(line);
|
||||
} else if (line.startsWith(PREFIX_KEY)) {
|
||||
|
@ -348,8 +377,10 @@ public class Dictionary {
|
|||
String line = reader.readLine();
|
||||
String ruleArgs[] = line.split("\\s+");
|
||||
|
||||
if (ruleArgs.length < 5) {
|
||||
throw new ParseException("The affix file contains a rule with less than five elements", reader.getLineNumber());
|
||||
// from the manpage: PFX flag stripping prefix [condition [morphological_fields...]]
|
||||
// condition is optional
|
||||
if (ruleArgs.length < 4) {
|
||||
throw new ParseException("The affix file contains a rule with less than four elements: " + line, reader.getLineNumber());
|
||||
}
|
||||
|
||||
char flag = flagParsingStrategy.parseFlag(ruleArgs[1]);
|
||||
|
@ -370,7 +401,7 @@ public class Dictionary {
|
|||
Arrays.sort(appendFlags);
|
||||
}
|
||||
|
||||
String condition = ruleArgs[4];
|
||||
String condition = ruleArgs.length > 4 ? ruleArgs[4] : ".";
|
||||
// at least the gascon affix file has this issue
|
||||
if (condition.startsWith("[") && !condition.endsWith("]")) {
|
||||
condition = condition + "]";
|
||||
|
@ -464,6 +495,9 @@ public class Dictionary {
|
|||
|
||||
return builder.finish();
|
||||
}
|
||||
|
||||
/** pattern accepts optional BOM + SET + any whitespace */
|
||||
final static Pattern ENCODING_PATTERN = Pattern.compile("^(\u00EF\u00BB\u00BF)?SET\\s+");
|
||||
|
||||
/**
|
||||
* Parses the encoding specified in the affix file readable through the provided InputStream
|
||||
|
@ -473,7 +507,7 @@ public class Dictionary {
|
|||
* @throws IOException Can be thrown while reading from the InputStream
|
||||
* @throws ParseException Thrown if the first non-empty non-comment line read from the file does not adhere to the format {@code SET <encoding>}
|
||||
*/
|
||||
private String getDictionaryEncoding(InputStream affix) throws IOException, ParseException {
|
||||
static String getDictionaryEncoding(InputStream affix) throws IOException, ParseException {
|
||||
final StringBuilder encoding = new StringBuilder();
|
||||
for (;;) {
|
||||
encoding.setLength(0);
|
||||
|
@ -496,9 +530,10 @@ public class Dictionary {
|
|||
}
|
||||
continue;
|
||||
}
|
||||
if (encoding.length() > 4 && "SET ".equals(encoding.substring(0, 4))) {
|
||||
// cleanup the encoding string, too (whitespace)
|
||||
return encoding.substring(4).trim();
|
||||
Matcher matcher = ENCODING_PATTERN.matcher(encoding);
|
||||
if (matcher.find()) {
|
||||
int last = matcher.end();
|
||||
return encoding.substring(last).trim();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -536,8 +571,12 @@ public class Dictionary {
|
|||
* @param flagLine Line containing the flag information
|
||||
* @return FlagParsingStrategy that handles parsing flags in the way specified in the FLAG definition
|
||||
*/
|
||||
private FlagParsingStrategy getFlagParsingStrategy(String flagLine) {
|
||||
String flagType = flagLine.substring(5);
|
||||
static FlagParsingStrategy getFlagParsingStrategy(String flagLine) {
|
||||
String parts[] = flagLine.split("\\s+");
|
||||
if (parts.length != 2) {
|
||||
throw new IllegalArgumentException("Illegal FLAG specification: " + flagLine);
|
||||
}
|
||||
String flagType = parts[1];
|
||||
|
||||
if (NUM_FLAG_TYPE.equals(flagType)) {
|
||||
return new NumFlagParsingStrategy();
|
||||
|
@ -550,6 +589,24 @@ public class Dictionary {
|
|||
throw new IllegalArgumentException("Unknown flag type: " + flagType);
|
||||
}
|
||||
|
||||
final char FLAG_SEPARATOR = 0x1f; // flag separator after escaping
|
||||
|
||||
String unescapeEntry(String entry) {
|
||||
StringBuilder sb = new StringBuilder();
|
||||
for (int i = 0; i < entry.length(); i++) {
|
||||
char ch = entry.charAt(i);
|
||||
if (ch == '\\' && i+1 < entry.length()) {
|
||||
sb.append(entry.charAt(i+1));
|
||||
i++;
|
||||
} else if (ch == '/') {
|
||||
sb.append(FLAG_SEPARATOR);
|
||||
} else {
|
||||
sb.append(ch);
|
||||
}
|
||||
}
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
/**
|
||||
* Reads the dictionary file through the provided InputStreams, building up the words map
|
||||
*
|
||||
|
@ -570,8 +627,9 @@ public class Dictionary {
|
|||
String line = lines.readLine(); // first line is number of entries (approximately, sometimes)
|
||||
|
||||
while ((line = lines.readLine()) != null) {
|
||||
line = unescapeEntry(line);
|
||||
if (needsInputCleaning) {
|
||||
int flagSep = line.lastIndexOf('/');
|
||||
int flagSep = line.lastIndexOf(FLAG_SEPARATOR);
|
||||
if (flagSep == -1) {
|
||||
CharSequence cleansed = cleanInput(line, sb);
|
||||
writer.write(cleansed.toString().getBytes(IOUtils.CHARSET_UTF_8));
|
||||
|
@ -604,7 +662,7 @@ public class Dictionary {
|
|||
scratch1.length = o1.length;
|
||||
|
||||
for (int i = scratch1.length - 1; i >= 0; i--) {
|
||||
if (scratch1.bytes[scratch1.offset + i] == '/') {
|
||||
if (scratch1.bytes[scratch1.offset + i] == FLAG_SEPARATOR) {
|
||||
scratch1.length = i;
|
||||
break;
|
||||
}
|
||||
|
@ -615,7 +673,7 @@ public class Dictionary {
|
|||
scratch2.length = o2.length;
|
||||
|
||||
for (int i = scratch2.length - 1; i >= 0; i--) {
|
||||
if (scratch2.bytes[scratch2.offset + i] == '/') {
|
||||
if (scratch2.bytes[scratch2.offset + i] == FLAG_SEPARATOR) {
|
||||
scratch2.length = i;
|
||||
break;
|
||||
}
|
||||
|
@ -648,7 +706,7 @@ public class Dictionary {
|
|||
String entry;
|
||||
char wordForm[];
|
||||
|
||||
int flagSep = line.lastIndexOf('/');
|
||||
int flagSep = line.lastIndexOf(FLAG_SEPARATOR);
|
||||
if (flagSep == -1) {
|
||||
wordForm = NOFLAGS;
|
||||
entry = line;
|
||||
|
@ -738,7 +796,9 @@ public class Dictionary {
|
|||
final int count = Integer.parseInt(ruleArgs[1]);
|
||||
aliases = new String[count];
|
||||
} else {
|
||||
aliases[aliasCount++] = ruleArgs[1];
|
||||
// an alias can map to no flags
|
||||
String aliasValue = ruleArgs.length == 1 ? "" : ruleArgs[1];
|
||||
aliases[aliasCount++] = aliasValue;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -753,7 +813,7 @@ public class Dictionary {
|
|||
/**
|
||||
* Abstraction of the process of parsing flags taken from the affix and dic files
|
||||
*/
|
||||
private static abstract class FlagParsingStrategy {
|
||||
static abstract class FlagParsingStrategy {
|
||||
|
||||
/**
|
||||
* Parses the given String into a single flag
|
||||
|
@ -828,6 +888,9 @@ public class Dictionary {
|
|||
}
|
||||
|
||||
StringBuilder builder = new StringBuilder();
|
||||
if (rawFlags.length() % 2 == 1) {
|
||||
throw new IllegalArgumentException("Invalid flags (should be even number of characters): " + rawFlags);
|
||||
}
|
||||
for (int i = 0; i < rawFlags.length(); i+=2) {
|
||||
char cookedFlag = (char) ((int) rawFlags.charAt(i) + (int) rawFlags.charAt(i + 1));
|
||||
builder.append(cookedFlag);
|
||||
|
|
|
@ -55,7 +55,7 @@ abstract class StemmerTestBase extends LuceneTestCase {
|
|||
}
|
||||
|
||||
try {
|
||||
Dictionary dictionary = new Dictionary(affixStream, Arrays.asList(dictStreams), true);
|
||||
Dictionary dictionary = new Dictionary(affixStream, Arrays.asList(dictStreams), ignoreCase);
|
||||
stemmer = new Stemmer(dictionary);
|
||||
} finally {
|
||||
IOUtils.closeWhileHandlingException(affixStream);
|
||||
|
|
|
@ -47,8 +47,8 @@ public class TestAllDictionaries2 extends LuceneTestCase {
|
|||
"afrikaans_spell_checker-20110323-fx+tb+fn+sm.xpi", "dictionaries/af-ZA.dic", "dictionaries/af-ZA.aff",
|
||||
"albanisches_worterbuch-1.6.9-fx+tb+sm+fn.xpi", "dictionaries/sq.dic", "dictionaries/sq.aff",
|
||||
"amharic_spell_checker-0.4-fx+fn+tb+sm.xpi", "dictionaries/am_ET.dic", "dictionaries/am_ET.aff",
|
||||
//BUG! "arabic_spell_checking_dictionary-3.2.20120321-fx+tb.xpi", "dictionaries/ar.dic", "dictionaries/ar.aff",
|
||||
//BUG! "armenian_spell_checker_dictionary-0.32-fx+tb+sm.xpi", "dictionaries/hy_AM.dic", "dictionaries/hy_AM.aff",
|
||||
"arabic_spell_checking_dictionary-3.2.20120321-fx+tb.xpi", "dictionaries/ar.dic", "dictionaries/ar.aff",
|
||||
"armenian_spell_checker_dictionary-0.32-fx+tb+sm.xpi", "dictionaries/hy_AM.dic", "dictionaries/hy_AM.aff",
|
||||
"azerbaijani_spell_checker-0.3-fx+tb+fn+sm+sb.xpi", "dictionaries/az-Latn-AZ.dic", "dictionaries/az-Latn-AZ.aff",
|
||||
"belarusian_classic_dictionary-0.1.2-tb+fx+sm.xpi", "dictionaries/be-classic.dic", "dictionaries/be-classic.aff",
|
||||
"belarusian_dictionary-0.1.2-fx+sm+tb.xpi", "dictionaries/be.dic", "dictionaries/be.aff",
|
||||
|
@ -72,13 +72,13 @@ public class TestAllDictionaries2 extends LuceneTestCase {
|
|||
"diccionario_espanol_argentina-2.5.1-tb+fx+sm.xpi", "dictionaries/es_AR.dic", "dictionaries/es_AR.aff",
|
||||
"diccionario_espanol_mexico-1.1.3-fn+tb+fx+sm.xpi", "dictionaries/es_MX.dic", "dictionaries/es_MX.aff",
|
||||
"diccionario_ortografico_valenciano-2.2.0-fx+tb+fn+sm.xpi", "dictionaries/roa-ES-val.dic", "dictionaries/roa-ES-val.aff",
|
||||
//BUG! "diccionario_papiamentoaruba-0.2-fn+sm+tb+fx.xpi", "dictionaries/Papiamento.dic", "dictionaries/Papiamento.aff",
|
||||
"diccionario_papiamentoaruba-0.2-fn+sm+tb+fx.xpi", "dictionaries/Papiamento.dic", "dictionaries/Papiamento.aff",
|
||||
"dictionnaires_francais-5.0.2-fx+tb+sm.xpi", "dictionaries/fr-classic-reform.dic", "dictionaries/fr-classic-reform.aff",
|
||||
"dictionnaires_francais-5.0.2-fx+tb+sm.xpi", "dictionaries/fr-classic.dic", "dictionaries/fr-classic.aff",
|
||||
"dictionnaires_francais-5.0.2-fx+tb+sm.xpi", "dictionaries/fr-modern.dic", "dictionaries/fr-modern.aff",
|
||||
"dictionnaires_francais-5.0.2-fx+tb+sm.xpi", "dictionaries/fr-reform.dic", "dictionaries/fr-reform.aff",
|
||||
"difazier_an_drouizig-0.12-tb+sm+fx.xpi", "dictionaries/br.dic", "dictionaries/br.aff",
|
||||
//BUG! "dikshonario_papiamentuantia_hulandes-0.5-fx+tb+fn+sb+sm.xpi", "dictionaries/Papiamentu.dic", "dictionaries/Papiamentu.aff",
|
||||
"dikshonario_papiamentuantia_hulandes-0.5-fx+tb+fn+sb+sm.xpi", "dictionaries/Papiamentu.dic", "dictionaries/Papiamentu.aff",
|
||||
"dizionari_furlan-3.1-tb+fx+sm.xpi", "dictionaries/fur-IT.dic", "dictionaries/fur-IT.aff",
|
||||
"dizionario_italiano-3.3.2-fx+sm+tb.xpi", "dictionaries/it_IT.dic", "dictionaries/it_IT.aff",
|
||||
"eesti_keele_speller-3.2-fx+tb+sm.xpi", "dictionaries/et-EE.dic", "dictionaries/et-EE.aff",
|
||||
|
@ -101,10 +101,10 @@ public class TestAllDictionaries2 extends LuceneTestCase {
|
|||
"hausa_spelling_dictionary-0.2-tb+fx.xpi", "dictionaries/ha-GH.dic", "dictionaries/ha-GH.aff",
|
||||
"hebrew_spell_checking_dictionary_from_hspell-1.2.0.1-fx+sm+tb.xpi", "dictionaries/he.dic", "dictionaries/he.aff",
|
||||
"hindi_spell_checker-0.4-fx+tb+sm+sb+fn.xpi", "dictionaries/hi_IN.dic", "dictionaries/hi_IN.aff",
|
||||
//BUG! "hungarian_dictionary-1.6.1.1-fx+tb+sm+fn.xpi", "dictionaries/hu_HU.dic", "dictionaries/hu_HU.aff",
|
||||
//BUG! "icelandic_dictionary-1.3-fx+tb+sm.xpi", "dictionaries/is.dic", "dictionaries/is.aff",
|
||||
"hungarian_dictionary-1.6.1.1-fx+tb+sm+fn.xpi", "dictionaries/hu.dic", "dictionaries/hu.aff",
|
||||
//BUG: has no encoding declaration "icelandic_dictionary-1.3-fx+tb+sm.xpi", "dictionaries/is.dic", "dictionaries/is.aff",
|
||||
"kamus_pengecek_ejaan_bahasa_indonesia-1.1-fx+tb.xpi", "dictionaries/id.dic", "dictionaries/id.aff",
|
||||
//BUG! "kannada_spell_checker-2.0.1-tb+sm+fn+an+fx.xpi", "dictionaries/kn.dic", "dictionaries/kn.aff",
|
||||
"kannada_spell_checker-2.0.1-tb+sm+fn+an+fx.xpi", "dictionaries/kn.dic", "dictionaries/kn.aff",
|
||||
"kashubian_spell_checker_poland-0.9-sm+tb+fx.xpi", "dictionaries/Kaszebsczi.dic", "dictionaries/Kaszebsczi.aff",
|
||||
"kiswahili_spell_checker-0.3-sb+tb+fn+fx+sm.xpi", "dictionaries/sw_TZ.dic", "dictionaries/sw_TZ.aff",
|
||||
"kurdish_spell_checker-0.96-fx+tb+sm.xpi", "dictionaries/ku-TR.dic", "dictionaries/ku-TR.aff",
|
||||
|
@ -113,8 +113,8 @@ public class TestAllDictionaries2 extends LuceneTestCase {
|
|||
"lithuanian_spelling_check_dictionary-1.3-fx+tb+sm+fn.xpi", "dictionaries/lt.dic", "dictionaries/lt.aff",
|
||||
"litreoir_gaelspell_do_mhozilla-4.7-tb+fx+sm+fn.xpi", "dictionaries/ga.dic", "dictionaries/ga.aff",
|
||||
"litreoir_na_liongailise-0.03-fx+sm+tb.xpi", "dictionaries/ln-CD.dic", "dictionaries/ln-CD.aff",
|
||||
//BUG! "macedonian_mk_mk_spellchecker-1.2-fn+tb+fx+sm+sb.xpi", "dictionaries/mk-MK-Cyrl.dic", "dictionaries/mk-MK-Cyrl.aff",
|
||||
//BUG! "macedonian_mk_mk_spellchecker-1.2-fn+tb+fx+sm+sb.xpi", "dictionaries/mk-MK-Latn.dic", "dictionaries/mk-MK-Latn.aff",
|
||||
"macedonian_mk_mk_spellchecker-1.2-fn+tb+fx+sm+sb.xpi", "dictionaries/mk-MK-Cyrl.dic", "dictionaries/mk-MK-Cyrl.aff",
|
||||
"macedonian_mk_mk_spellchecker-1.2-fn+tb+fx+sm+sb.xpi", "dictionaries/mk-MK-Latn.dic", "dictionaries/mk-MK-Latn.aff",
|
||||
"malagasy_spell_checker-0.3-fn+tb+fx+sm+sb.xpi", "dictionaries/mg_MG.dic", "dictionaries/mg_MG.aff",
|
||||
"marathi_dictionary-9.3-sm+tb+sb+fx.xpi", "dictionaries/mr-IN.dic", "dictionaries/mr-IN.aff",
|
||||
"ndebele_south_spell_checker-20110323-tb+fn+fx+sm.xpi", "dictionaries/nr-ZA.dic", "dictionaries/nr-ZA.aff",
|
||||
|
@ -125,8 +125,8 @@ public class TestAllDictionaries2 extends LuceneTestCase {
|
|||
"oriya_spell_checker-0.3-fn+tb+fx+sm+sb.xpi", "dictionaries/or-IN.dic", "dictionaries/or-IN.aff",
|
||||
"polski_slownik_poprawnej_pisowni-1.0.20110621-fx+tb+sm.xpi", "dictionaries/pl.dic", "dictionaries/pl.aff",
|
||||
"punjabi_spell_checker-0.3-fx+tb+sm+sb+fn.xpi", "dictionaries/pa-IN.dic", "dictionaries/pa-IN.aff",
|
||||
//BUG! "romanian_spellchecking_dictionary-1.14-sm+tb+fx.xpi", "dictionaries/ro_RO-ante1993.dic", "dictionaries/ro_RO-ante1993.aff",
|
||||
//BUG! "russian_hunspell_dictionary-1.0.20131101-tb+sm+fn+fx.xpi", "dictionaries/ru_RU.dic", "dictionaries/ru_RU.aff",
|
||||
"romanian_spellchecking_dictionary-1.14-sm+tb+fx.xpi", "dictionaries/ro_RO-ante1993.dic", "dictionaries/ro_RO-ante1993.aff",
|
||||
"russian_hunspell_dictionary-1.0.20131101-tb+sm+fn+fx.xpi", "dictionaries/ru_RU.dic", "dictionaries/ru_RU.aff",
|
||||
"sanskrit_spell_checker-1.1-fx+tb+sm+sb+fn.xpi", "dictionaries/sa_IN.dic", "dictionaries/sa_IN.aff",
|
||||
"scottish_gaelic_spell_checker-2.7-tb+fx+sm.xpi", "dictionaries/gd-GB.dic", "dictionaries/gd-GB.aff",
|
||||
"serbian_dictionary-0.18-fx+tb+sm.xpi", "dictionaries/sr-RS-Cyrl.dic", "dictionaries/sr-RS-Cyrl.aff",
|
||||
|
@ -146,22 +146,22 @@ public class TestAllDictionaries2 extends LuceneTestCase {
|
|||
"telugu_spell_checker-0.3-tb+fx+sm.xpi", "dictionaries/te_IN.dic", "dictionaries/te_IN.aff",
|
||||
"te_papakupu_m__ori-0.9.9.20080630-fx+tb.xpi", "dictionaries/mi-x-Tai Tokerau.dic", "dictionaries/mi-x-Tai Tokerau.aff",
|
||||
"te_papakupu_m__ori-0.9.9.20080630-fx+tb.xpi", "dictionaries/mi.dic", "dictionaries/mi.aff",
|
||||
//BUG! "thamizha_solthiruthitamil_spellchecker-0.8-fx+tb.xpi", "dictionaries/ta_IN.dic", "dictionaries/ta_IN.aff",
|
||||
//BUG: broken file (hunspell refuses to load, too) "thamizha_solthiruthitamil_spellchecker-0.8-fx+tb.xpi", "dictionaries/ta_IN.dic", "dictionaries/ta_IN.aff",
|
||||
"tsonga_spell_checker-20110323-tb+sm+fx+fn.xpi", "dictionaries/ts-ZA.dic", "dictionaries/ts-ZA.aff",
|
||||
"tswana_spell_checker-20110323-tb+sm+fx+fn.xpi", "dictionaries/tn-ZA.dic", "dictionaries/tn-ZA.aff",
|
||||
"turkce_yazm_denetimi-3.5-sm+tb+fx.xpi", "dictionaries/tr.dic", "dictionaries/tr.aff",
|
||||
//BUG! "turkmen_spell_checker_dictionary-0.1.6-tb+fx+sm.xpi", "dictionaries/tk_TM.dic", "dictionaries/tk_TM.aff",
|
||||
"turkmen_spell_checker_dictionary-0.1.6-tb+fx+sm.xpi", "dictionaries/tk_TM.dic", "dictionaries/tk_TM.aff",
|
||||
"ukrainian_dictionary-1.7.0-sm+an+fx+fn+tb.xpi", "dictionaries/uk-UA.dic", "dictionaries/uk-UA.aff",
|
||||
"united_states_english_spellchecker-7.0.1-sm+tb+fx+an.xpi", "dictionaries/en-US.dic", "dictionaries/en-US.aff",
|
||||
"upper_sorbian_spelling_dictionary-0.0.20060327.3-tb+fx+sm.xpi", "dictionaries/hsb.dic", "dictionaries/hsb.aff",
|
||||
//BUG! "urdu_dictionary-0.64-fx+tb+sm+sb.xpi", "dictionaries/ur.dic", "dictionaries/ur.aff",
|
||||
"urdu_dictionary-0.64-fx+tb+sm+sb.xpi", "dictionaries/ur.dic", "dictionaries/ur.aff",
|
||||
"uzbek_spell_checker-0.3-fn+tb+fx+sm+sb.xpi", "dictionaries/uz.dic", "dictionaries/uz.aff",
|
||||
"valencian_catalan_dictionary-2.5.0-tb+fn+sm+fx.xpi", "dictionaries/ca-ES-valencia.dic", "dictionaries/ca-ES-valencia.aff",
|
||||
"venda_spell_checker-20110323-tb+fn+fx+sm.xpi", "dictionaries/ve-ZA.dic", "dictionaries/ve-ZA.aff",
|
||||
"verificador_ortografico_para_portugues_do_brasil-2.3-3.2b1-tb+sm+fn+fx.xpi", "dictionaries/pt_BR.dic", "dictionaries/pt_BR.aff",
|
||||
"vietnamese_dictionary-2.1.0.159-an+sm+tb+fx+fn.xpi", "dictionaries/vi-DauCu.dic", "dictionaries/vi-DauCu.aff",
|
||||
"vietnamese_dictionary-2.1.0.159-an+sm+tb+fx+fn.xpi", "dictionaries/vi-DauMoi.dic", "dictionaries/vi-DauMoi.aff",
|
||||
//BUG! "woordenboek_nederlands-3.1.1-sm+tb+fx+fn.xpi", "dictionaries/nl.dic", "dictionaries/nl.aff",
|
||||
"woordenboek_nederlands-3.1.1-sm+tb+fx+fn.xpi", "dictionaries/nl.dic", "dictionaries/nl.aff",
|
||||
"xhosa_spell_checker-20110323-tb+fn+fx+sm.xpi", "dictionaries/xh-ZA.dic", "dictionaries/xh-ZA.aff",
|
||||
"xuxen-4.0.1-fx+tb+sm.xpi", "dictionaries/eu.dic", "dictionaries/eu.aff",
|
||||
"yiddish_spell_checker_yivo-0.0.3-sm+fn+fx+tb.xpi", "dictionaries/yi.dic", "dictionaries/yi.aff",
|
||||
|
@ -196,7 +196,7 @@ public class TestAllDictionaries2 extends LuceneTestCase {
|
|||
}
|
||||
|
||||
public void testOneDictionary() throws Exception {
|
||||
String toTest = "hausa_spelling_dictionary-0.2-tb+fx.xpi";
|
||||
String toTest = "hungarian_dictionary-1.6.1.1-fx+tb+sm+fn.xpi";
|
||||
for (int i = 0; i < tests.length; i++) {
|
||||
if (tests[i].equals(toTest)) {
|
||||
File f = new File(DICTIONARY_HOME, tests[i]);
|
||||
|
@ -210,7 +210,7 @@ public class TestAllDictionaries2 extends LuceneTestCase {
|
|||
|
||||
try (InputStream dictionary = zip.getInputStream(dicEntry);
|
||||
InputStream affix = zip.getInputStream(affEntry)) {
|
||||
new Dictionary(affix, dictionary);
|
||||
new Dictionary(affix, dictionary);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -17,6 +17,7 @@ package org.apache.lucene.analysis.hunspell;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.ByteArrayInputStream;
|
||||
import java.io.FilterInputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
|
@ -24,6 +25,7 @@ import java.text.ParseException;
|
|||
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.CharsRef;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.apache.lucene.util.IntsRef;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util.fst.Builder;
|
||||
|
@ -77,6 +79,40 @@ public class TestDictionary extends LuceneTestCase {
|
|||
affixStream.close();
|
||||
dictStream.close();
|
||||
}
|
||||
|
||||
public void testCompressedBeforeSetDictionary() throws Exception {
|
||||
InputStream affixStream = getClass().getResourceAsStream("compressed-before-set.aff");
|
||||
InputStream dictStream = getClass().getResourceAsStream("compressed.dic");
|
||||
|
||||
Dictionary dictionary = new Dictionary(affixStream, dictStream);
|
||||
assertEquals(3, dictionary.lookupSuffix(new char[]{'e'}, 0, 1).length);
|
||||
assertEquals(1, dictionary.lookupPrefix(new char[]{'s'}, 0, 1).length);
|
||||
IntsRef ordList = dictionary.lookupWord(new char[]{'o', 'l', 'r'}, 0, 3);
|
||||
BytesRef ref = new BytesRef();
|
||||
dictionary.flagLookup.get(ordList.ints[0], ref);
|
||||
char flags[] = Dictionary.decodeFlags(ref);
|
||||
assertEquals(1, flags.length);
|
||||
|
||||
affixStream.close();
|
||||
dictStream.close();
|
||||
}
|
||||
|
||||
public void testCompressedEmptyAliasDictionary() throws Exception {
|
||||
InputStream affixStream = getClass().getResourceAsStream("compressed-empty-alias.aff");
|
||||
InputStream dictStream = getClass().getResourceAsStream("compressed.dic");
|
||||
|
||||
Dictionary dictionary = new Dictionary(affixStream, dictStream);
|
||||
assertEquals(3, dictionary.lookupSuffix(new char[]{'e'}, 0, 1).length);
|
||||
assertEquals(1, dictionary.lookupPrefix(new char[]{'s'}, 0, 1).length);
|
||||
IntsRef ordList = dictionary.lookupWord(new char[]{'o', 'l', 'r'}, 0, 3);
|
||||
BytesRef ref = new BytesRef();
|
||||
dictionary.flagLookup.get(ordList.ints[0], ref);
|
||||
char flags[] = Dictionary.decodeFlags(ref);
|
||||
assertEquals(1, flags.length);
|
||||
|
||||
affixStream.close();
|
||||
dictStream.close();
|
||||
}
|
||||
|
||||
// malformed rule causes ParseException
|
||||
public void testInvalidData() throws Exception {
|
||||
|
@ -87,7 +123,7 @@ public class TestDictionary extends LuceneTestCase {
|
|||
new Dictionary(affixStream, dictStream);
|
||||
fail("didn't get expected exception");
|
||||
} catch (ParseException expected) {
|
||||
assertEquals("The affix file contains a rule with less than five elements", expected.getMessage());
|
||||
assertTrue(expected.getMessage().startsWith("The affix file contains a rule with less than four elements"));
|
||||
assertEquals(24, expected.getErrorOffset());
|
||||
}
|
||||
|
||||
|
@ -178,4 +214,16 @@ public class TestDictionary extends LuceneTestCase {
|
|||
Dictionary.applyMappings(fst, sb);
|
||||
assertEquals("ghghghde", sb.toString());
|
||||
}
|
||||
|
||||
public void testSetWithCrazyWhitespaceAndBOMs() throws Exception {
|
||||
assertEquals("UTF-8", Dictionary.getDictionaryEncoding(new ByteArrayInputStream("SET\tUTF-8\n".getBytes(IOUtils.CHARSET_UTF_8))));
|
||||
assertEquals("UTF-8", Dictionary.getDictionaryEncoding(new ByteArrayInputStream("SET\t UTF-8\n".getBytes(IOUtils.CHARSET_UTF_8))));
|
||||
assertEquals("UTF-8", Dictionary.getDictionaryEncoding(new ByteArrayInputStream("\uFEFFSET\tUTF-8\n".getBytes(IOUtils.CHARSET_UTF_8))));
|
||||
assertEquals("UTF-8", Dictionary.getDictionaryEncoding(new ByteArrayInputStream("\uFEFFSET\tUTF-8\r\n".getBytes(IOUtils.CHARSET_UTF_8))));
|
||||
}
|
||||
|
||||
public void testFlagWithCrazyWhitespace() throws Exception {
|
||||
assertNotNull(Dictionary.getFlagParsingStrategy("FLAG\tUTF-8"));
|
||||
assertNotNull(Dictionary.getFlagParsingStrategy("FLAG UTF-8"));
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,36 @@
|
|||
package org.apache.lucene.analysis.hunspell;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.junit.BeforeClass;
|
||||
|
||||
public class TestEscaped extends StemmerTestBase {
|
||||
@BeforeClass
|
||||
public static void beforeClass() throws Exception {
|
||||
init("escaped.aff", "escaped.dic");
|
||||
}
|
||||
|
||||
public void testStemming() {
|
||||
assertStemsTo("works", "work");
|
||||
assertStemsTo("work", "work");
|
||||
assertStemsTo("R2/D2", "R2/D2");
|
||||
assertStemsTo("R2/D2s", "R2/D2");
|
||||
assertStemsTo("N/A", "N/A");
|
||||
assertStemsTo("N/As");
|
||||
}
|
||||
}
|
|
@ -31,6 +31,7 @@ import org.apache.lucene.analysis.hunspell.Dictionary;
|
|||
import org.apache.lucene.analysis.hunspell.HunspellStemFilter;
|
||||
import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
|
||||
import org.apache.lucene.analysis.util.CharArraySet;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.junit.AfterClass;
|
||||
import org.junit.BeforeClass;
|
||||
|
||||
|
@ -39,9 +40,13 @@ public class TestHunspellStemFilter extends BaseTokenStreamTestCase {
|
|||
|
||||
@BeforeClass
|
||||
public static void beforeClass() throws Exception {
|
||||
try (InputStream affixStream = TestStemmer.class.getResourceAsStream("simple.aff");
|
||||
InputStream dictStream = TestStemmer.class.getResourceAsStream("simple.dic")) {
|
||||
// no multiple try-with to workaround bogus VerifyError
|
||||
InputStream affixStream = TestStemmer.class.getResourceAsStream("simple.aff");
|
||||
InputStream dictStream = TestStemmer.class.getResourceAsStream("simple.dic");
|
||||
try {
|
||||
dictionary = new Dictionary(affixStream, dictStream);
|
||||
} finally {
|
||||
IOUtils.closeWhileHandlingException(affixStream, dictStream);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -97,9 +102,13 @@ public class TestHunspellStemFilter extends BaseTokenStreamTestCase {
|
|||
|
||||
public void testIgnoreCaseNoSideEffects() throws Exception {
|
||||
final Dictionary d;
|
||||
try (InputStream affixStream = TestStemmer.class.getResourceAsStream("simple.aff");
|
||||
InputStream dictStream = TestStemmer.class.getResourceAsStream("simple.dic")) {
|
||||
// no multiple try-with to workaround bogus VerifyError
|
||||
InputStream affixStream = TestStemmer.class.getResourceAsStream("simple.aff");
|
||||
InputStream dictStream = TestStemmer.class.getResourceAsStream("simple.dic");
|
||||
try {
|
||||
d = new Dictionary(affixStream, Collections.singletonList(dictStream), true);
|
||||
} finally {
|
||||
IOUtils.closeWhileHandlingException(affixStream, dictStream);
|
||||
}
|
||||
Analyzer a = new Analyzer() {
|
||||
@Override
|
||||
|
|
|
@ -0,0 +1,42 @@
|
|||
package org.apache.lucene.analysis.hunspell;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.junit.BeforeClass;
|
||||
|
||||
public class TestOptionalCondition extends StemmerTestBase {
|
||||
@BeforeClass
|
||||
public static void beforeClass() throws Exception {
|
||||
init("optional-condition.aff", "condition.dic");
|
||||
}
|
||||
|
||||
public void testStemming() {
|
||||
assertStemsTo("hello", "hello");
|
||||
assertStemsTo("try", "try");
|
||||
assertStemsTo("tried", "try");
|
||||
assertStemsTo("work", "work");
|
||||
assertStemsTo("worked", "work");
|
||||
assertStemsTo("rework", "work");
|
||||
assertStemsTo("reworked", "work");
|
||||
assertStemsTo("retried");
|
||||
assertStemsTo("workied");
|
||||
assertStemsTo("tryed");
|
||||
assertStemsTo("tryied");
|
||||
assertStemsTo("helloed");
|
||||
}
|
||||
}
|
|
@ -19,6 +19,6 @@ SFX E 0 d o
|
|||
PFX B Y 1
|
||||
PFX B 0 s o
|
||||
|
||||
#wrong rule (only 4 elements)
|
||||
#wrong rule (only 3 elements)
|
||||
PFX A0 Y 1
|
||||
PFX A0 0 a
|
||||
PFX A0 0
|
|
@ -0,0 +1,29 @@
|
|||
SET UTF-8
|
||||
TRY abcdefghijklmopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ
|
||||
|
||||
FLAG long
|
||||
|
||||
AF 5
|
||||
AF AA
|
||||
AF BB
|
||||
AF CC
|
||||
AF DD
|
||||
AF EE
|
||||
|
||||
SFX AA Y 3
|
||||
SFX AA 0 e n
|
||||
SFX AA 0 e t
|
||||
SFX AA 0 e h
|
||||
|
||||
SFX CC Y 2
|
||||
SFX CC 0 d/3 c
|
||||
SFX CC 0 c b
|
||||
|
||||
SFX DD Y 1
|
||||
SFX DD 0 s o
|
||||
|
||||
SFX EE Y 1
|
||||
SFX EE 0 d o
|
||||
|
||||
PFX BB Y 1
|
||||
PFX BB 0 s o
|
|
@ -0,0 +1,30 @@
|
|||
SET UTF-8
|
||||
TRY abcdefghijklmopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ
|
||||
|
||||
FLAG long
|
||||
|
||||
AF 6
|
||||
AF AA
|
||||
AF BB
|
||||
AF CC
|
||||
AF DD
|
||||
AF EE
|
||||
AF
|
||||
|
||||
SFX AA Y 3
|
||||
SFX AA 0 e n
|
||||
SFX AA 0 e t
|
||||
SFX AA 0 e h
|
||||
|
||||
SFX CC Y 2
|
||||
SFX CC 0 d/3 c
|
||||
SFX CC 0 c b
|
||||
|
||||
SFX DD Y 1
|
||||
SFX DD 0 s o
|
||||
|
||||
SFX EE Y 1
|
||||
SFX EE 0 d o
|
||||
|
||||
PFX BB Y 1
|
||||
PFX BB 0 s o
|
|
@ -1,8 +1,3 @@
|
|||
SET UTF-8
|
||||
TRY abcdefghijklmopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ
|
||||
|
||||
FLAG long
|
||||
|
||||
AF 5
|
||||
AF AA
|
||||
AF BB
|
||||
|
@ -10,6 +5,11 @@ AF CC
|
|||
AF DD
|
||||
AF EE
|
||||
|
||||
SET UTF-8
|
||||
TRY abcdefghijklmopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ
|
||||
|
||||
FLAG long
|
||||
|
||||
SFX AA Y 3
|
||||
SFX AA 0 e n
|
||||
SFX AA 0 e t
|
||||
|
|
|
@ -0,0 +1,4 @@
|
|||
SET UTF-8
|
||||
|
||||
SFX A Y 1
|
||||
SFX A 0 s . +PLUR
|
|
@ -0,0 +1,4 @@
|
|||
3
|
||||
work/A
|
||||
R2\/D2/A
|
||||
N\/A
|
|
@ -0,0 +1,14 @@
|
|||
SET UTF-8
|
||||
TRY esianrtolcdugmphbyfvkwzESIANRTOLCDUGMPHBYFVKWZ’
|
||||
|
||||
REP 2
|
||||
REP f ph
|
||||
REP ph f
|
||||
|
||||
# has no condition
|
||||
PFX A Y 1
|
||||
PFX A 0 re
|
||||
|
||||
SFX B Y 2
|
||||
SFX B 0 ed [^y]
|
||||
SFX B y ied y
|
|
@ -19,7 +19,7 @@ package org.apache.lucene.search;
|
|||
|
||||
import java.util.Collection;
|
||||
|
||||
/** Used by {@link BulkScorers} that need to pass a {@link
|
||||
/** Used by {@link BulkScorer}s that need to pass a {@link
|
||||
* Scorer} to {@link Collector#setScorer}. */
|
||||
final class FakeScorer extends Scorer {
|
||||
float score;
|
||||
|
|
|
@ -26,12 +26,13 @@ import org.apache.lucene.index.AtomicReader;
|
|||
import org.apache.lucene.index.AtomicReaderContext;
|
||||
import org.apache.lucene.index.DirectoryReader;
|
||||
import org.apache.lucene.index.DocsEnum;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.RandomIndexWriter;
|
||||
import org.apache.lucene.index.Terms;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.Bits.MatchNoBits;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.apache.lucene.util.LineFileDocs;
|
||||
|
@ -121,8 +122,11 @@ public class TestReuseDocsEnum extends LuceneTestCase {
|
|||
public void testReuseDocsEnumDifferentReader() throws IOException {
|
||||
Directory dir = newDirectory();
|
||||
Codec cp = TestUtil.alwaysPostingsFormat(new Lucene40RWPostingsFormat());
|
||||
MockAnalyzer analyzer = new MockAnalyzer(random());
|
||||
analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH));
|
||||
|
||||
RandomIndexWriter writer = new RandomIndexWriter(random(), dir,
|
||||
newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())).setCodec(cp));
|
||||
newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer).setCodec(cp));
|
||||
int numdocs = atLeast(20);
|
||||
createRandomIndex(numdocs, writer, random());
|
||||
writer.commit();
|
||||
|
|
|
@ -139,8 +139,11 @@ public class TestBackwardsCompatibility extends LuceneTestCase {
|
|||
mp.setUseCompoundFile(false);
|
||||
mp.setNoCFSRatio(1.0);
|
||||
mp.setMaxCFSSegmentSizeMB(Double.POSITIVE_INFINITY);
|
||||
MockAnalyzer analyzer = new MockAnalyzer(random());
|
||||
analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH));
|
||||
|
||||
// TODO: remove randomness
|
||||
IndexWriterConfig conf = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()))
|
||||
IndexWriterConfig conf = new IndexWriterConfig(TEST_VERSION_CURRENT, analyzer)
|
||||
.setMergePolicy(mp);
|
||||
conf.setCodec(Codec.forName("Lucene40"));
|
||||
IndexWriter writer = new IndexWriter(dir, conf);
|
||||
|
|
|
@ -30,6 +30,7 @@ import org.apache.lucene.search.similarities.Similarity;
|
|||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.LineFileDocs;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util.TestUtil;
|
||||
|
||||
/**
|
||||
*
|
||||
|
@ -41,8 +42,11 @@ public class TestCustomNorms extends LuceneTestCase {
|
|||
public void testFloatNorms() throws IOException {
|
||||
|
||||
Directory dir = newDirectory();
|
||||
MockAnalyzer analyzer = new MockAnalyzer(random());
|
||||
analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH));
|
||||
|
||||
IndexWriterConfig config = newIndexWriterConfig(TEST_VERSION_CURRENT,
|
||||
new MockAnalyzer(random()));
|
||||
analyzer);
|
||||
Similarity provider = new MySimProvider();
|
||||
config.setSimilarity(provider);
|
||||
RandomIndexWriter writer = new RandomIndexWriter(random(), dir, config);
|
||||
|
|
|
@ -20,7 +20,6 @@ package org.apache.lucene.index;
|
|||
import java.io.IOException;
|
||||
import java.util.Random;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.codecs.Codec;
|
||||
import org.apache.lucene.document.Document;
|
||||
|
@ -63,9 +62,12 @@ public class TestDuelingCodecs extends LuceneTestCase {
|
|||
long seed = random().nextLong();
|
||||
|
||||
// must use same seed because of random payloads, etc
|
||||
Analyzer leftAnalyzer = new MockAnalyzer(new Random(seed));
|
||||
Analyzer rightAnalyzer = new MockAnalyzer(new Random(seed));
|
||||
|
||||
int maxTermLength = TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH);
|
||||
MockAnalyzer leftAnalyzer = new MockAnalyzer(new Random(seed));
|
||||
leftAnalyzer.setMaxTokenLength(maxTermLength);
|
||||
MockAnalyzer rightAnalyzer = new MockAnalyzer(new Random(seed));
|
||||
rightAnalyzer.setMaxTokenLength(maxTermLength);
|
||||
|
||||
// but these can be different
|
||||
// TODO: this turns this into a really big test of Multi*, is that what we want?
|
||||
IndexWriterConfig leftConfig = newIndexWriterConfig(TEST_VERSION_CURRENT, leftAnalyzer);
|
||||
|
|
|
@ -29,6 +29,7 @@ import org.apache.lucene.store.Directory;
|
|||
import org.apache.lucene.store.MockDirectoryWrapper;
|
||||
import org.apache.lucene.util.LineFileDocs;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util.TestUtil;
|
||||
import org.junit.AfterClass;
|
||||
import org.junit.BeforeClass;
|
||||
|
||||
|
@ -64,8 +65,11 @@ public class TestFlushByRamOrCountsPolicy extends LuceneTestCase {
|
|||
AtomicInteger numDocs = new AtomicInteger(numDocumentsToIndex);
|
||||
Directory dir = newDirectory();
|
||||
MockDefaultFlushPolicy flushPolicy = new MockDefaultFlushPolicy();
|
||||
MockAnalyzer analyzer = new MockAnalyzer(random());
|
||||
analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH));
|
||||
|
||||
IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT,
|
||||
new MockAnalyzer(random())).setFlushPolicy(flushPolicy);
|
||||
analyzer).setFlushPolicy(flushPolicy);
|
||||
final int numDWPT = 1 + atLeast(2);
|
||||
DocumentsWriterPerThreadPool threadPool = new ThreadAffinityDocumentsWriterThreadPool(
|
||||
numDWPT);
|
||||
|
|
|
@ -54,7 +54,10 @@ public class TestForceMergeForever extends LuceneTestCase {
|
|||
|
||||
public void test() throws Exception {
|
||||
final Directory d = newDirectory();
|
||||
final MyIndexWriter w = new MyIndexWriter(d, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())));
|
||||
MockAnalyzer analyzer = new MockAnalyzer(random());
|
||||
analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH));
|
||||
|
||||
final MyIndexWriter w = new MyIndexWriter(d, newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer));
|
||||
|
||||
// Try to make an index that requires merging:
|
||||
w.getConfig().setMaxBufferedDocs(TestUtil.nextInt(random(), 2, 11));
|
||||
|
|
|
@ -51,7 +51,9 @@ public class TestIndexWriterOutOfFileDescriptors extends LuceneTestCase {
|
|||
System.out.println("TEST: iter=" + iter);
|
||||
}
|
||||
try {
|
||||
IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
|
||||
MockAnalyzer analyzer = new MockAnalyzer(random());
|
||||
analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH));
|
||||
IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
|
||||
|
||||
if (VERBOSE) {
|
||||
// Do this ourselves instead of relying on LTC so
|
||||
|
|
|
@ -548,7 +548,10 @@ public class TestIndexWriterWithThreads extends LuceneTestCase {
|
|||
final int threadCount = TestUtil.nextInt(random(), 2, 6);
|
||||
|
||||
final AtomicReference<IndexWriter> writerRef = new AtomicReference<IndexWriter>();
|
||||
writerRef.set(new IndexWriter(d, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()))));
|
||||
MockAnalyzer analyzer = new MockAnalyzer(random());
|
||||
analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH));
|
||||
|
||||
writerRef.set(new IndexWriter(d, newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer)));
|
||||
final LineFileDocs docs = new LineFileDocs(random());
|
||||
final Thread[] threads = new Thread[threadCount];
|
||||
final int iters = atLeast(100);
|
||||
|
|
|
@ -75,7 +75,10 @@ public class TestNorms extends LuceneTestCase {
|
|||
// LUCENE-1260
|
||||
public void testCustomEncoder() throws Exception {
|
||||
Directory dir = newDirectory();
|
||||
IndexWriterConfig config = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
|
||||
MockAnalyzer analyzer = new MockAnalyzer(random());
|
||||
analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH));
|
||||
|
||||
IndexWriterConfig config = newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
|
||||
config.setSimilarity(new CustomNormEncodingSimilarity());
|
||||
RandomIndexWriter writer = new RandomIndexWriter(random(), dir, config);
|
||||
Document doc = new Document();
|
||||
|
|
|
@ -46,7 +46,10 @@ public class TestRollingUpdates extends LuceneTestCase {
|
|||
Codec.setDefault(TestUtil.alwaysPostingsFormat(new MemoryPostingsFormat(random().nextBoolean(), random.nextFloat())));
|
||||
}
|
||||
|
||||
final IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())));
|
||||
MockAnalyzer analyzer = new MockAnalyzer(random());
|
||||
analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH));
|
||||
|
||||
final IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer));
|
||||
final int SIZE = atLeast(20);
|
||||
int id = 0;
|
||||
IndexReader r = null;
|
||||
|
|
|
@ -44,7 +44,9 @@ public class TestTermsEnum extends LuceneTestCase {
|
|||
Random random = new Random(random().nextLong());
|
||||
final LineFileDocs docs = new LineFileDocs(random, true);
|
||||
final Directory d = newDirectory();
|
||||
final RandomIndexWriter w = new RandomIndexWriter(random(), d);
|
||||
MockAnalyzer analyzer = new MockAnalyzer(random());
|
||||
analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH));
|
||||
final RandomIndexWriter w = new RandomIndexWriter(random(), d, analyzer);
|
||||
final int numDocs = atLeast(10);
|
||||
for(int docCount=0;docCount<numDocs;docCount++) {
|
||||
w.addDocument(docs.nextDoc());
|
||||
|
|
|
@ -24,8 +24,10 @@ import java.util.List;
|
|||
import java.util.Map;
|
||||
import java.util.concurrent.CountDownLatch;
|
||||
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.MultiFields;
|
||||
import org.apache.lucene.index.RandomIndexWriter;
|
||||
import org.apache.lucene.index.Term;
|
||||
|
@ -36,13 +38,14 @@ import org.apache.lucene.util.BytesRef;
|
|||
import org.apache.lucene.util.LineFileDocs;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util.TestUtil;
|
||||
import org.apache.lucene.util.TestUtil;
|
||||
|
||||
public class TestSameScoresWithThreads extends LuceneTestCase {
|
||||
|
||||
public void test() throws Exception {
|
||||
final Directory dir = newDirectory();
|
||||
final RandomIndexWriter w = new RandomIndexWriter(random(), dir);
|
||||
MockAnalyzer analyzer = new MockAnalyzer(random());
|
||||
analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH));
|
||||
final RandomIndexWriter w = new RandomIndexWriter(random(), dir, analyzer);
|
||||
LineFileDocs docs = new LineFileDocs(random());
|
||||
int charsToIndex = atLeast(100000);
|
||||
int charsIndexed = 0;
|
||||
|
|
|
@ -38,14 +38,15 @@ import org.apache.lucene.util.BytesRef;
|
|||
import org.apache.lucene.util.LineFileDocs;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util.TestUtil;
|
||||
import org.apache.lucene.util.TestUtil;
|
||||
|
||||
public class TestNRTCachingDirectory extends LuceneTestCase {
|
||||
|
||||
public void testNRTAndCommit() throws Exception {
|
||||
Directory dir = newDirectory();
|
||||
NRTCachingDirectory cachedDir = new NRTCachingDirectory(dir, 2.0, 25.0);
|
||||
IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
|
||||
MockAnalyzer analyzer = new MockAnalyzer(random());
|
||||
analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH));
|
||||
IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
|
||||
RandomIndexWriter w = new RandomIndexWriter(random(), cachedDir, conf);
|
||||
final LineFileDocs docs = new LineFileDocs(random(), true);
|
||||
final int numDocs = TestUtil.nextInt(random(), 100, 400);
|
||||
|
|
|
@ -292,7 +292,10 @@ public class TestFSTs extends LuceneTestCase {
|
|||
|
||||
final LineFileDocs docs = new LineFileDocs(random(), true);
|
||||
final int RUN_TIME_MSEC = atLeast(500);
|
||||
final IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())).setMaxBufferedDocs(-1).setRAMBufferSizeMB(64);
|
||||
MockAnalyzer analyzer = new MockAnalyzer(random());
|
||||
analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH));
|
||||
|
||||
final IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer).setMaxBufferedDocs(-1).setRAMBufferSizeMB(64);
|
||||
final File tempDir = TestUtil.getTempDir("fstlines");
|
||||
final Directory dir = newFSDirectory(tempDir);
|
||||
final IndexWriter writer = new IndexWriter(dir, conf);
|
||||
|
|
|
@ -128,7 +128,7 @@ public abstract class AbstractAllGroupHeadsCollector<GH extends AbstractAllGroup
|
|||
|
||||
@Override
|
||||
public boolean acceptsDocsOutOfOrder() {
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -380,10 +380,7 @@ public class AllGroupHeadsCollectorTest extends LuceneTestCase {
|
|||
System.out.println("\n===================================================================================");
|
||||
}
|
||||
|
||||
assertEquals(expectedGroupHeads.length, actualGroupHeads.length);
|
||||
for (int i = 0; i < expectedGroupHeads.length; i++) {
|
||||
assertEquals(expectedGroupHeads[i], actualGroupHeads[i]);
|
||||
}
|
||||
assertArrayEquals(expectedGroupHeads, actualGroupHeads);
|
||||
}
|
||||
} finally {
|
||||
QueryUtils.purgeFieldCache(r);
|
||||
|
|
|
@ -436,6 +436,7 @@ public class MemoryIndexTest extends BaseTokenStreamTestCase {
|
|||
for (int i = 0; i < numDocs; i++) {
|
||||
Directory dir = newDirectory();
|
||||
MockAnalyzer mockAnalyzer = new MockAnalyzer(random());
|
||||
mockAnalyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH));
|
||||
IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(random(), TEST_VERSION_CURRENT, mockAnalyzer));
|
||||
Document nextDoc = lineFileDocs.nextDoc();
|
||||
Document doc = new Document();
|
||||
|
|
|
@ -222,24 +222,14 @@ public final class TermsFilter extends Filter {
|
|||
}
|
||||
|
||||
TermsFilter test = (TermsFilter) obj;
|
||||
if (test.hashCode == hashCode && this.termsAndFields.length == test.termsAndFields.length) {
|
||||
// first check the fields before even comparing the bytes
|
||||
for (int i = 0; i < termsAndFields.length; i++) {
|
||||
TermsAndField current = termsAndFields[i];
|
||||
if (!current.equals(test.termsAndFields[i])) {
|
||||
return false;
|
||||
}
|
||||
// first check the fields before even comparing the bytes
|
||||
if (test.hashCode == hashCode && Arrays.equals(termsAndFields, test.termsAndFields)) {
|
||||
int lastOffset = termsAndFields[termsAndFields.length - 1].end;
|
||||
// compare offsets since we sort they must be identical
|
||||
if (ArrayUtil.equals(offsets, 0, test.offsets, 0, lastOffset + 1)) {
|
||||
// straight byte comparison since we sort they must be identical
|
||||
return ArrayUtil.equals(termsBytes, 0, test.termsBytes, 0, offsets[lastOffset]);
|
||||
}
|
||||
// straight byte comparison since we sort they must be identical
|
||||
int end = offsets[termsAndFields.length];
|
||||
byte[] left = this.termsBytes;
|
||||
byte[] right = test.termsBytes;
|
||||
for(int i=0;i < end;i++) {
|
||||
if (left[i] != right[i]) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
|
|
@ -17,19 +17,29 @@ package org.apache.lucene.queries;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Random;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.index.AtomicReader;
|
||||
import org.apache.lucene.index.DirectoryReader;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.RandomIndexWriter;
|
||||
import org.apache.lucene.index.SlowCompositeReaderWrapper;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.TermContext;
|
||||
import org.apache.lucene.index.Terms;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.search.BooleanClause;
|
||||
import org.apache.lucene.search.BooleanClause.Occur;
|
||||
import org.apache.lucene.search.BooleanClause;
|
||||
import org.apache.lucene.search.BooleanQuery;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.Query;
|
||||
|
@ -45,19 +55,13 @@ import org.apache.lucene.util.PriorityQueue;
|
|||
import org.apache.lucene.util.TestUtil;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Random;
|
||||
import java.util.Set;
|
||||
|
||||
public class CommonTermsQueryTest extends LuceneTestCase {
|
||||
|
||||
public void testBasics() throws IOException {
|
||||
Directory dir = newDirectory();
|
||||
RandomIndexWriter w = new RandomIndexWriter(random(), dir);
|
||||
MockAnalyzer analyzer = new MockAnalyzer(random());
|
||||
analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH));
|
||||
RandomIndexWriter w = new RandomIndexWriter(random(), dir, analyzer);
|
||||
String[] docs = new String[] {"this is the end of the world right",
|
||||
"is this it or maybe not",
|
||||
"this is the end of the universe as we know it",
|
||||
|
@ -186,7 +190,9 @@ public class CommonTermsQueryTest extends LuceneTestCase {
|
|||
|
||||
public void testMinShouldMatch() throws IOException {
|
||||
Directory dir = newDirectory();
|
||||
RandomIndexWriter w = new RandomIndexWriter(random(), dir);
|
||||
MockAnalyzer analyzer = new MockAnalyzer(random());
|
||||
analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH));
|
||||
RandomIndexWriter w = new RandomIndexWriter(random(), dir, analyzer);
|
||||
String[] docs = new String[] {"this is the end of the world right",
|
||||
"is this it or maybe not",
|
||||
"this is the end of the universe as we know it",
|
||||
|
@ -344,7 +350,9 @@ public class CommonTermsQueryTest extends LuceneTestCase {
|
|||
@Test
|
||||
public void testExtend() throws IOException {
|
||||
Directory dir = newDirectory();
|
||||
RandomIndexWriter w = new RandomIndexWriter(random(), dir);
|
||||
MockAnalyzer analyzer = new MockAnalyzer(random());
|
||||
analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH));
|
||||
RandomIndexWriter w = new RandomIndexWriter(random(), dir, analyzer);
|
||||
String[] docs = new String[] {"this is the end of the world right",
|
||||
"is this it or maybe not",
|
||||
"this is the end of the universe as we know it",
|
||||
|
@ -397,7 +405,9 @@ public class CommonTermsQueryTest extends LuceneTestCase {
|
|||
|
||||
public void testRandomIndex() throws IOException {
|
||||
Directory dir = newDirectory();
|
||||
RandomIndexWriter w = new RandomIndexWriter(random(), dir);
|
||||
MockAnalyzer analyzer = new MockAnalyzer(random());
|
||||
analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH));
|
||||
RandomIndexWriter w = new RandomIndexWriter(random(), dir, analyzer);
|
||||
createRandomIndex(atLeast(50), w, random().nextLong());
|
||||
DirectoryReader reader = w.getReader();
|
||||
AtomicReader wrapper = SlowCompositeReaderWrapper.wrap(reader);
|
||||
|
|
|
@ -50,7 +50,6 @@ import org.apache.lucene.util.BytesRef;
|
|||
import org.apache.lucene.util.FixedBitSet;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util.TestUtil;
|
||||
import org.apache.lucene.util.TestUtil;
|
||||
|
||||
public class TermsFilterTest extends LuceneTestCase {
|
||||
|
||||
|
@ -297,7 +296,15 @@ public class TermsFilterTest extends LuceneTestCase {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
public void testSingleFieldEquals() {
|
||||
// Two terms with the same hash code
|
||||
assertEquals("AaAaBB".hashCode(), "BBBBBB".hashCode());
|
||||
TermsFilter left = termsFilter(true, new Term("id", "AaAaAa"), new Term("id", "AaAaBB"));
|
||||
TermsFilter right = termsFilter(true, new Term("id", "AaAaAa"), new Term("id", "BBBBBB"));
|
||||
assertFalse(left.equals(right));
|
||||
}
|
||||
|
||||
public void testNoTerms() {
|
||||
List<Term> emptyTerms = Collections.emptyList();
|
||||
List<BytesRef> emptyBytesRef = Collections.emptyList();
|
||||
|
|
|
@ -1379,7 +1379,9 @@ public abstract class BasePostingsFormatTestCase extends LuceneTestCase {
|
|||
// during flush/merge
|
||||
public void testInvertedWrite() throws Exception {
|
||||
Directory dir = newDirectory();
|
||||
IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
|
||||
MockAnalyzer analyzer = new MockAnalyzer(random());
|
||||
analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH));
|
||||
IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
|
||||
|
||||
// Must be concurrent because thread(s) can be merging
|
||||
// while up to one thread flushes, and each of those
|
||||
|
|
|
@ -0,0 +1,110 @@
|
|||
package org.apache.lucene.search;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.lang.ref.WeakReference;
|
||||
import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
import java.util.Map;
|
||||
import java.util.Random;
|
||||
import java.util.WeakHashMap;
|
||||
|
||||
import org.apache.lucene.index.DocsEnum;
|
||||
import org.apache.lucene.util.VirtualMethod;
|
||||
|
||||
/** A crazy {@link BulkScorer} that wraps a {@link Scorer}
|
||||
* but shuffles the order of the collected documents. */
|
||||
public class AssertingBulkOutOfOrderScorer extends BulkScorer {
|
||||
|
||||
final Random random;
|
||||
final Scorer scorer;
|
||||
|
||||
public AssertingBulkOutOfOrderScorer(Random random, Scorer scorer) {
|
||||
this.random = random;
|
||||
this.scorer = scorer;
|
||||
}
|
||||
|
||||
private void shuffle(int[] docIDs, float[] scores, int[] freqs, int size) {
|
||||
for (int i = size - 1; i > 0; --i) {
|
||||
final int other = random.nextInt(i + 1);
|
||||
|
||||
final int tmpDoc = docIDs[i];
|
||||
docIDs[i] = docIDs[other];
|
||||
docIDs[other] = tmpDoc;
|
||||
|
||||
final float tmpScore = scores[i];
|
||||
scores[i] = scores[other];
|
||||
scores[other] = tmpScore;
|
||||
|
||||
final int tmpFreq = freqs[i];
|
||||
freqs[i] = freqs[other];
|
||||
freqs[other] = tmpFreq;
|
||||
}
|
||||
}
|
||||
|
||||
private static void flush(int[] docIDs, float[] scores, int[] freqs, int size,
|
||||
FakeScorer scorer, Collector collector) throws IOException {
|
||||
for (int i = 0; i < size; ++i) {
|
||||
scorer.doc = docIDs[i];
|
||||
scorer.freq = freqs[i];
|
||||
scorer.score = scores[i];
|
||||
collector.collect(scorer.doc);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean score(Collector collector, int max) throws IOException {
|
||||
if (scorer.docID() == -1) {
|
||||
scorer.nextDoc();
|
||||
}
|
||||
|
||||
FakeScorer fake = new FakeScorer();
|
||||
collector.setScorer(fake);
|
||||
|
||||
final int bufferSize = 1 + random.nextInt(100);
|
||||
final int[] docIDs = new int[bufferSize];
|
||||
final float[] scores = new float[bufferSize];
|
||||
final int[] freqs = new int[bufferSize];
|
||||
|
||||
int buffered = 0;
|
||||
int doc = scorer.docID();
|
||||
while (doc < max) {
|
||||
docIDs[buffered] = doc;
|
||||
scores[buffered] = scorer.score();
|
||||
freqs[buffered] = scorer.freq();
|
||||
|
||||
if (++buffered == bufferSize) {
|
||||
shuffle(docIDs, scores, freqs, buffered);
|
||||
flush(docIDs, scores, freqs, buffered, fake, collector);
|
||||
buffered = 0;
|
||||
}
|
||||
doc = scorer.nextDoc();
|
||||
}
|
||||
|
||||
shuffle(docIDs, scores, freqs, buffered);
|
||||
flush(docIDs, scores, freqs, buffered, fake, collector);
|
||||
|
||||
return doc != Scorer.NO_MORE_DOCS;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "AssertingBulkOutOfOrderScorer(" + scorer + ")";
|
||||
}
|
||||
}
|
|
@ -34,18 +34,11 @@ public class AssertingBulkScorer extends BulkScorer {
|
|||
private static final VirtualMethod<BulkScorer> SCORE_COLLECTOR = new VirtualMethod<BulkScorer>(BulkScorer.class, "score", Collector.class);
|
||||
private static final VirtualMethod<BulkScorer> SCORE_COLLECTOR_RANGE = new VirtualMethod<BulkScorer>(BulkScorer.class, "score", Collector.class, int.class);
|
||||
|
||||
// we need to track scorers using a weak hash map because otherwise we
|
||||
// could loose references because of eg.
|
||||
// AssertingScorer.score(Collector) which needs to delegate to work correctly
|
||||
private static Map<BulkScorer, WeakReference<AssertingBulkScorer>> ASSERTING_INSTANCES = Collections.synchronizedMap(new WeakHashMap<BulkScorer, WeakReference<AssertingBulkScorer>>());
|
||||
|
||||
public static BulkScorer wrap(Random random, BulkScorer other) {
|
||||
if (other == null || other instanceof AssertingBulkScorer) {
|
||||
return other;
|
||||
}
|
||||
final AssertingBulkScorer assertScorer = new AssertingBulkScorer(random, other);
|
||||
ASSERTING_INSTANCES.put(other, new WeakReference<AssertingBulkScorer>(assertScorer));
|
||||
return assertScorer;
|
||||
return new AssertingBulkScorer(random, other);
|
||||
}
|
||||
|
||||
public static boolean shouldWrap(BulkScorer inScorer) {
|
||||
|
@ -87,4 +80,5 @@ public class AssertingBulkScorer extends BulkScorer {
|
|||
public String toString() {
|
||||
return "AssertingBulkScorer(" + in + ")";
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -29,12 +29,14 @@ class AssertingWeight extends Weight {
|
|||
return other instanceof AssertingWeight ? other : new AssertingWeight(random, other);
|
||||
}
|
||||
|
||||
final boolean scoresDocsOutOfOrder;
|
||||
final Random random;
|
||||
final Weight in;
|
||||
|
||||
AssertingWeight(Random random, Weight in) {
|
||||
this.random = random;
|
||||
this.in = in;
|
||||
scoresDocsOutOfOrder = in.scoresDocsOutOfOrder() || random.nextBoolean();
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -73,8 +75,21 @@ class AssertingWeight extends Weight {
|
|||
if (inScorer == null) {
|
||||
return null;
|
||||
}
|
||||
|
||||
if (AssertingBulkScorer.shouldWrap(inScorer)) {
|
||||
// The incoming scorer already has a specialized
|
||||
// implementation for BulkScorer, so we should use it:
|
||||
return AssertingBulkScorer.wrap(new Random(random.nextLong()), inScorer);
|
||||
} else if (scoreDocsInOrder == false && random.nextBoolean()) {
|
||||
// The caller claims it can handle out-of-order
|
||||
// docs; let's confirm that by pulling docs and
|
||||
// randomly shuffling them before collection:
|
||||
//Scorer scorer = in.scorer(context, acceptDocs);
|
||||
Scorer scorer = scorer(context, acceptDocs);
|
||||
|
||||
// Scorer should not be null if bulkScorer wasn't:
|
||||
assert scorer != null;
|
||||
return new AssertingBulkOutOfOrderScorer(new Random(random.nextLong()), scorer);
|
||||
} else {
|
||||
// Let super wrap this.scorer instead, so we use
|
||||
// AssertingScorer:
|
||||
|
@ -84,8 +99,7 @@ class AssertingWeight extends Weight {
|
|||
|
||||
@Override
|
||||
public boolean scoresDocsOutOfOrder() {
|
||||
return in.scoresDocsOutOfOrder();
|
||||
return scoresDocsOutOfOrder;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
|
|
@ -329,7 +329,7 @@ public class QueryUtils {
|
|||
|
||||
@Override
|
||||
public boolean acceptsDocsOutOfOrder() {
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
});
|
||||
|
||||
|
|
|
@ -449,7 +449,9 @@ public abstract class ShardSearchingTestBase extends LuceneTestCase {
|
|||
myNodeID = nodeID;
|
||||
dir = newFSDirectory(TestUtil.getTempDir("ShardSearchingTestBase"));
|
||||
// TODO: set warmer
|
||||
IndexWriterConfig iwc = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random));
|
||||
MockAnalyzer analyzer = new MockAnalyzer(random());
|
||||
analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH));
|
||||
IndexWriterConfig iwc = new IndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
|
||||
iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
|
||||
if (VERBOSE) {
|
||||
iwc.setInfoStream(new PrintStreamInfoStream(System.out));
|
||||
|
|
|
@ -138,6 +138,12 @@ Bug Fixes
|
|||
* SOLR-5818: distrib search with custom comparator does not quite work correctly
|
||||
(Ryan Ernst)
|
||||
|
||||
* SOLR-5834: Overseer threads are only being interrupted and not closed.
|
||||
(hossman, Mark Miller)
|
||||
|
||||
* SOLR-5839: ZookeeperInfoServlet does not trim path properly.
|
||||
(Furkan KAMACI via Mark Miller)
|
||||
|
||||
Optimizations
|
||||
----------------------
|
||||
* SOLR-1880: Distributed Search skips GET_FIELDS stage if EXECUTE_QUERY
|
||||
|
@ -181,6 +187,13 @@ Other Changes
|
|||
* SOLR-5796: Make how long we are willing to wait for a core to see the ZK
|
||||
advertised leader in it's local state configurable.
|
||||
(Timothy Potter via Mark Miller)
|
||||
|
||||
* SOLR-5825: Separate http request creating and execution in SolrJ
|
||||
(Steven Bower via Erick Erickson)
|
||||
|
||||
* SOLR-5837: Add hashCode/equals to SolrDocument, SolrInputDocument
|
||||
and SolrInputField for testing purposes. (Varun Thacker, Noble Paul,
|
||||
Mark Miller)
|
||||
|
||||
================== 4.7.0 ==================
|
||||
|
||||
|
|
|
@ -81,8 +81,8 @@ public class Overseer {
|
|||
//Internal queue where overseer stores events that have not yet been published into cloudstate
|
||||
//If Overseer dies while extracting the main queue a new overseer will start from this queue
|
||||
private final DistributedQueue workQueue;
|
||||
private volatile boolean isClosed;
|
||||
private Map clusterProps;
|
||||
private boolean isClosed = false;
|
||||
|
||||
public ClusterStateUpdater(final ZkStateReader reader, final String myId) {
|
||||
this.zkClient = reader.getZkClient();
|
||||
|
@ -1030,20 +1030,22 @@ public class Overseer {
|
|||
|
||||
class OverseerThread extends Thread implements ClosableThread {
|
||||
|
||||
private volatile boolean isClosed;
|
||||
protected volatile boolean isClosed;
|
||||
private ClosableThread thread;
|
||||
|
||||
public OverseerThread(ThreadGroup tg,
|
||||
ClusterStateUpdater clusterStateUpdater) {
|
||||
super(tg, clusterStateUpdater);
|
||||
public OverseerThread(ThreadGroup tg, ClosableThread thread) {
|
||||
super(tg, (Runnable) thread);
|
||||
this.thread = thread;
|
||||
}
|
||||
|
||||
public OverseerThread(ThreadGroup ccTg,
|
||||
OverseerCollectionProcessor overseerCollectionProcessor, String string) {
|
||||
super(ccTg, overseerCollectionProcessor, string);
|
||||
public OverseerThread(ThreadGroup ccTg, ClosableThread thread, String name) {
|
||||
super(ccTg, (Runnable) thread, name);
|
||||
this.thread = thread;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() {
|
||||
thread.close();
|
||||
this.isClosed = true;
|
||||
}
|
||||
|
||||
|
@ -1084,8 +1086,7 @@ public class Overseer {
|
|||
ThreadGroup ccTg = new ThreadGroup("Overseer collection creation process.");
|
||||
|
||||
ocp = new OverseerCollectionProcessor(reader, id, shardHandler, adminPath);
|
||||
ccThread = new OverseerThread(ccTg, ocp,
|
||||
"Overseer-" + id);
|
||||
ccThread = new OverseerThread(ccTg, ocp, "Overseer-" + id);
|
||||
ccThread.setDaemon(true);
|
||||
|
||||
updaterThread.start();
|
||||
|
|
|
@ -195,7 +195,7 @@ public final class ZookeeperInfoServlet extends HttpServlet {
|
|||
if (path == null) {
|
||||
path = "/";
|
||||
} else {
|
||||
path.trim();
|
||||
path = path.trim();
|
||||
if (path.length() == 0) {
|
||||
path = "/";
|
||||
}
|
||||
|
|
|
@ -23,7 +23,9 @@ import org.apache.lucene.index.AtomicReader;
|
|||
import org.apache.lucene.index.NumericDocValues;
|
||||
import org.apache.lucene.index.SlowCompositeReaderWrapper;
|
||||
import org.apache.lucene.search.similarities.DefaultSimilarity;
|
||||
import org.apache.lucene.util.TestUtil;
|
||||
import org.apache.solr.SolrTestCaseJ4;
|
||||
import org.apache.solr.common.SolrDocument;
|
||||
import org.apache.solr.common.params.CommonParams;
|
||||
import org.apache.solr.common.SolrException;
|
||||
import org.apache.solr.common.SolrInputDocument;
|
||||
|
@ -394,4 +396,106 @@ public class DocumentBuilderTest extends SolrTestCaseJ4 {
|
|||
assertNull(h.validateUpdate(add(xml, new String[0])));
|
||||
}
|
||||
|
||||
public void testSolrDocumentEquals() {
|
||||
|
||||
String randomString = TestUtil.randomSimpleString(random());
|
||||
|
||||
SolrDocument doc1 = new SolrDocument();
|
||||
doc1.addField("foo", randomString);
|
||||
|
||||
SolrDocument doc2 = new SolrDocument();
|
||||
doc2.addField("foo", randomString);
|
||||
|
||||
assertTrue(doc1.equals(doc2));
|
||||
|
||||
doc1.addField("foo", "bar");
|
||||
|
||||
assertFalse(doc1.equals(doc2));
|
||||
|
||||
doc1 = new SolrDocument();
|
||||
doc1.addField("bar", randomString);
|
||||
|
||||
assertFalse(doc1.equals(doc2));
|
||||
|
||||
int randomInt = random().nextInt();
|
||||
doc1 = new SolrDocument();
|
||||
doc1.addField("foo", randomInt);
|
||||
doc2 = new SolrDocument();
|
||||
doc2.addField("foo", randomInt);
|
||||
|
||||
assertTrue(doc1.equals(doc2));
|
||||
|
||||
doc2 = new SolrDocument();
|
||||
doc2.addField("bar", randomInt);
|
||||
|
||||
assertFalse(doc1.equals(doc2));
|
||||
|
||||
}
|
||||
|
||||
public void testSolrInputDocumentEquality() {
|
||||
|
||||
String randomString = TestUtil.randomSimpleString(random());
|
||||
|
||||
SolrInputDocument doc1 = new SolrInputDocument();
|
||||
doc1.addField("foo", randomString);
|
||||
SolrInputDocument doc2 = new SolrInputDocument();
|
||||
doc2.addField("foo", randomString);
|
||||
|
||||
assertTrue(doc1.equals(doc2));
|
||||
|
||||
doc1.setDocumentBoost(1.1f);
|
||||
assertFalse(doc1.equals(doc2));
|
||||
|
||||
doc2.setDocumentBoost(1.1f);
|
||||
assertTrue(doc1.equals(doc2));
|
||||
|
||||
doc2.setDocumentBoost(20f);
|
||||
assertFalse(doc1.equals(doc2));
|
||||
|
||||
|
||||
doc1 = new SolrInputDocument();
|
||||
doc1.addField("foo", randomString);
|
||||
doc2 = new SolrInputDocument();
|
||||
doc2.addField("foo", randomString);
|
||||
|
||||
SolrInputDocument childDoc = new SolrInputDocument();
|
||||
childDoc.addField("foo", "bar");
|
||||
|
||||
doc1.addChildDocument(childDoc);
|
||||
assertFalse(doc1.equals(doc2));
|
||||
|
||||
doc2.addChildDocument(childDoc);
|
||||
assertTrue(doc1.equals(doc2));
|
||||
|
||||
SolrInputDocument childDoc1 = new SolrInputDocument();
|
||||
childDoc.addField(TestUtil.randomSimpleString(random()), TestUtil.randomSimpleString(random()));
|
||||
doc2.addChildDocument(childDoc1);
|
||||
assertFalse(doc1.equals(doc2));
|
||||
|
||||
}
|
||||
|
||||
public void testSolrInputFieldEquality() {
|
||||
String randomString = TestUtil.randomSimpleString(random(), 10, 20);
|
||||
|
||||
int val = random().nextInt();
|
||||
SolrInputField sif1 = new SolrInputField(randomString);
|
||||
sif1.setValue(val, 1.0f);
|
||||
SolrInputField sif2 = new SolrInputField(randomString);
|
||||
sif2.setValue(val, 1.0f);
|
||||
|
||||
assertTrue(sif1.equals(sif2));
|
||||
|
||||
sif1.setBoost(2.1f);
|
||||
sif2.setBoost(2.1f);
|
||||
assertTrue(sif1.equals(sif2));
|
||||
|
||||
sif2.setBoost(2.0f);
|
||||
assertFalse(sif1.equals(sif2));
|
||||
|
||||
sif2.setName("foo");
|
||||
assertFalse(sif1.equals(sif2));
|
||||
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -199,8 +199,11 @@ public class HttpSolrServer extends SolrServer {
|
|||
return request(request, responseParser);
|
||||
}
|
||||
|
||||
public NamedList<Object> request(final SolrRequest request,
|
||||
final ResponseParser processor) throws SolrServerException, IOException {
|
||||
public NamedList<Object> request(final SolrRequest request, final ResponseParser processor) throws SolrServerException, IOException {
|
||||
return executeMethod(createMethod(request),processor);
|
||||
}
|
||||
|
||||
protected HttpRequestBase createMethod(final SolrRequest request) throws IOException, SolrServerException {
|
||||
HttpRequestBase method = null;
|
||||
InputStream is = null;
|
||||
SolrParams params = request.getParams();
|
||||
|
@ -382,6 +385,10 @@ public class HttpSolrServer extends SolrServer {
|
|||
throw new SolrServerException("error reading streams", ex);
|
||||
}
|
||||
|
||||
return method;
|
||||
}
|
||||
|
||||
protected NamedList<Object> executeMethod(HttpRequestBase method, final ResponseParser processor) throws SolrServerException {
|
||||
// XXX client already has this set, is this needed?
|
||||
method.getParams().setParameter(ClientPNames.HANDLE_REDIRECTS,
|
||||
followRedirects);
|
||||
|
|
|
@ -213,7 +213,41 @@ public class SolrDocument implements Map<String,Object>, Iterable<Map.Entry<Stri
|
|||
public Iterator<Entry<String, Object>> iterator() {
|
||||
return _fields.entrySet().iterator();
|
||||
}
|
||||
|
||||
/**
|
||||
* This method is implemented for tests and should not be counted
|
||||
* on in production code.
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
@Override
|
||||
public boolean equals(Object o) {
|
||||
if (this == o) {
|
||||
return true;
|
||||
}
|
||||
if (!(o instanceof SolrDocument)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
SolrDocument solrDocument = (SolrDocument) o;
|
||||
|
||||
if (!_fields.equals(solrDocument._fields)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* This method is implemented for tests and should not be counted
|
||||
* on in production code.
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return _fields.hashCode();
|
||||
}
|
||||
|
||||
//-----------------------------------------------------------------------------------------
|
||||
// JSTL Helpers
|
||||
//-----------------------------------------------------------------------------------------
|
||||
|
|
|
@ -275,7 +275,51 @@ public class SolrInputDocument implements Map<String,SolrInputField>, Iterable<S
|
|||
public Collection<SolrInputField> values() {
|
||||
return _fields.values();
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* This method is implemented for tests and should not be counted
|
||||
* on in production code.
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
@Override
|
||||
public boolean equals(Object o) {
|
||||
if (this == o) {
|
||||
return true;
|
||||
}
|
||||
if (!(o instanceof SolrInputDocument)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
SolrInputDocument sdoc = (SolrInputDocument) o;
|
||||
|
||||
if (!_fields.equals(sdoc._fields)) {
|
||||
return false;
|
||||
}
|
||||
if (Float.compare(sdoc._documentBoost, _documentBoost) != 0) {
|
||||
return false;
|
||||
}
|
||||
if (_childDocuments != null ? !_childDocuments.equals(sdoc._childDocuments) : sdoc._childDocuments != null) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* This method is implemented for tests and should not be counted
|
||||
* on in production code.
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
@Override
|
||||
public int hashCode() {
|
||||
int result = _fields.hashCode();
|
||||
result = 31 * result + (_documentBoost != +0.0f ? Float.floatToIntBits(_documentBoost) : 0);
|
||||
result = 31 * result + (_childDocuments != null ? _childDocuments.hashCode() : 0);
|
||||
return result;
|
||||
}
|
||||
|
||||
public void addChildDocument(SolrInputDocument child) {
|
||||
if (_childDocuments == null) {
|
||||
_childDocuments = new ArrayList<SolrInputDocument>();
|
||||
|
|
|
@ -229,4 +229,50 @@ public class SolrInputField implements Iterable<Object>, Serializable
|
|||
}
|
||||
return clone;
|
||||
}
|
||||
|
||||
/**
|
||||
* This method is implemented for tests and should not be counted
|
||||
* on in production code.
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
@Override
|
||||
public boolean equals(Object o) {
|
||||
if (this == o) {
|
||||
return true;
|
||||
}
|
||||
if (!(o instanceof SolrInputField)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
SolrInputField sif = (SolrInputField) o;
|
||||
|
||||
if (!name.equals(sif.name)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!value.equals(sif.value)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (Float.compare(sif.boost, boost) != 0) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* This method is implemented for tests and should not be counted
|
||||
* on in production code.
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
@Override
|
||||
public int hashCode() {
|
||||
int result = name.hashCode();
|
||||
result = 31 * result + value.hashCode();
|
||||
result = 31 * result + (boost != +0.0f ? Float.floatToIntBits(boost) : 0);
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -521,7 +521,7 @@ public class JavaBinCodec {
|
|||
public Object getValue() {
|
||||
return value;
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "MapEntry[" + key.toString() + ":" + value.toString() + "]";
|
||||
|
@ -530,7 +530,28 @@ public class JavaBinCodec {
|
|||
@Override
|
||||
public Object setValue(Object value) {
|
||||
throw new UnsupportedOperationException();
|
||||
}};
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
int result = 31;
|
||||
result *=31 + getKey().hashCode();
|
||||
result *=31 + getValue().hashCode();
|
||||
return result;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object obj) {
|
||||
if(this == obj) {
|
||||
return true;
|
||||
}
|
||||
if(!(obj instanceof Entry)) {
|
||||
return false;
|
||||
}
|
||||
Map.Entry<Object, Object> entry = (Entry<Object, Object>) obj;
|
||||
return (this.getKey().equals(entry.getKey()) && this.getValue().equals(entry.getValue()));
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
Binary file not shown.
|
@ -17,17 +17,37 @@ package org.apache.solr.common.util;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.BufferedOutputStream;
|
||||
import java.io.ByteArrayInputStream;
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.File;
|
||||
import java.io.FileOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Date;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util.TestUtil;
|
||||
import org.apache.solr.common.EnumFieldValue;
|
||||
import org.apache.solr.common.SolrDocument;
|
||||
import org.apache.solr.common.SolrDocumentList;
|
||||
import org.apache.solr.common.SolrInputDocument;
|
||||
import org.junit.Test;
|
||||
|
||||
public class TestJavaBinCodec extends LuceneTestCase {
|
||||
|
||||
|
||||
private static final String SOLRJ_JAVABIN_BACKCOMPAT_BIN = "/solrj/javabin_backcompat.bin";
|
||||
private final String BIN_FILE_LOCATION = "./solr/solrj/src/test-files/solrj/javabin_backcompat.bin";
|
||||
|
||||
public void testStrings() throws Exception {
|
||||
JavaBinCodec javabin = new JavaBinCodec();
|
||||
for (int i = 0; i < 10000*RANDOM_MULTIPLIER; i++) {
|
||||
for (int i = 0; i < 10000 * RANDOM_MULTIPLIER; i++) {
|
||||
String s = TestUtil.randomUnicodeString(random());
|
||||
ByteArrayOutputStream os = new ByteArrayOutputStream();
|
||||
javabin.marshal(s, os);
|
||||
|
@ -36,4 +56,158 @@ public class TestJavaBinCodec extends LuceneTestCase {
|
|||
assertEquals(s, o);
|
||||
}
|
||||
}
|
||||
|
||||
private List<Object> generateAllDataTypes() {
|
||||
List<Object> types = new ArrayList<>();
|
||||
|
||||
types.add(null); //NULL
|
||||
types.add(true);
|
||||
types.add(false);
|
||||
types.add((byte) 1);
|
||||
types.add((short) 2);
|
||||
types.add((double) 3);
|
||||
|
||||
types.add(-4);
|
||||
types.add(4);
|
||||
types.add(42);
|
||||
|
||||
types.add((long) -5);
|
||||
types.add((long) 5);
|
||||
types.add((long) 50);
|
||||
|
||||
types.add((float) 6);
|
||||
types.add(new Date(0));
|
||||
|
||||
Map<Integer, Integer> map = new HashMap<>();
|
||||
map.put(1, 2);
|
||||
types.add(map);
|
||||
|
||||
SolrDocument doc = new SolrDocument();
|
||||
doc.addField("foo", "bar");
|
||||
types.add(doc);
|
||||
|
||||
SolrDocumentList solrDocs = new SolrDocumentList();
|
||||
solrDocs.setMaxScore(1.0f);
|
||||
solrDocs.setNumFound(1);
|
||||
solrDocs.setStart(0);
|
||||
solrDocs.add(0, doc);
|
||||
types.add(solrDocs);
|
||||
|
||||
types.add(new byte[] {1,2,3,4,5});
|
||||
|
||||
// TODO?
|
||||
// List<String> list = new ArrayList<String>();
|
||||
// list.add("one");
|
||||
// types.add(list.iterator());
|
||||
|
||||
types.add((byte) 15); //END
|
||||
|
||||
SolrInputDocument idoc = new SolrInputDocument();
|
||||
idoc.addField("foo", "bar");
|
||||
types.add(idoc);
|
||||
|
||||
SolrInputDocument parentDoc = new SolrInputDocument();
|
||||
parentDoc.addField("foo", "bar");
|
||||
SolrInputDocument childDoc = new SolrInputDocument();
|
||||
childDoc.addField("foo", "bar");
|
||||
parentDoc.addChildDocument(childDoc);
|
||||
types.add(parentDoc);
|
||||
|
||||
types.add(new EnumFieldValue(1, "foo"));
|
||||
|
||||
types.add(map.entrySet().iterator().next()); //Map.Entry
|
||||
|
||||
types.add((byte) (1 << 5)); //TAG_AND_LEN
|
||||
|
||||
types.add("foo");
|
||||
types.add(1);
|
||||
types.add((long) 2);
|
||||
|
||||
SimpleOrderedMap simpleOrderedMap = new SimpleOrderedMap();
|
||||
simpleOrderedMap.add("bar", "barbar");
|
||||
types.add(simpleOrderedMap);
|
||||
|
||||
NamedList<String> nl = new NamedList<>();
|
||||
nl.add("foo", "barbar");
|
||||
types.add(nl);
|
||||
|
||||
return types;
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testBackCompat() {
|
||||
List iteratorAsList = null;
|
||||
JavaBinCodec javabin = new JavaBinCodec(){
|
||||
@Override
|
||||
public List<Object> readIterator(DataInputInputStream fis) throws IOException {
|
||||
return super.readIterator(fis);
|
||||
}
|
||||
};
|
||||
try {
|
||||
InputStream is = getClass().getResourceAsStream(SOLRJ_JAVABIN_BACKCOMPAT_BIN);
|
||||
List<Object> unmarshaledObj = (List<Object>) javabin.unmarshal(is);
|
||||
List<Object> matchObj = generateAllDataTypes();
|
||||
|
||||
assertEquals(unmarshaledObj.size(), matchObj.size());
|
||||
for(int i=0; i < unmarshaledObj.size(); i++) {
|
||||
|
||||
if(unmarshaledObj.get(i) instanceof byte[] && matchObj.get(i) instanceof byte[]) {
|
||||
byte[] b1 = (byte[]) unmarshaledObj.get(i);
|
||||
byte[] b2 = (byte[]) matchObj.get(i);
|
||||
assertTrue(Arrays.equals(b1, b2));
|
||||
|
||||
} else {
|
||||
assertEquals(unmarshaledObj.get(i), matchObj.get(i));
|
||||
}
|
||||
|
||||
}
|
||||
} catch (IOException e) {
|
||||
fail(e.getMessage());
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testForwardCompat() {
|
||||
JavaBinCodec javabin = new JavaBinCodec();
|
||||
ByteArrayOutputStream os = new ByteArrayOutputStream();
|
||||
|
||||
Object data = generateAllDataTypes();
|
||||
try {
|
||||
javabin.marshal(data, os);
|
||||
byte[] newFormatBytes = os.toByteArray();
|
||||
|
||||
InputStream is = getClass().getResourceAsStream(SOLRJ_JAVABIN_BACKCOMPAT_BIN);
|
||||
byte[] currentFormatBytes = IOUtils.toByteArray(is);
|
||||
|
||||
for (int i = 1; i < currentFormatBytes.length; i++) {//ignore the first byte. It is version information
|
||||
assertEquals(currentFormatBytes[i], newFormatBytes[i]);
|
||||
}
|
||||
|
||||
} catch (IOException e) {
|
||||
e.printStackTrace();
|
||||
fail(e.getMessage());
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
public void genBinaryFile() throws IOException {
|
||||
JavaBinCodec javabin = new JavaBinCodec();
|
||||
ByteArrayOutputStream os = new ByteArrayOutputStream();
|
||||
|
||||
Object data = generateAllDataTypes();
|
||||
|
||||
javabin.marshal(data, os);
|
||||
byte[] out = os.toByteArray();
|
||||
FileOutputStream fs = new FileOutputStream(new File(BIN_FILE_LOCATION));
|
||||
BufferedOutputStream bos = new BufferedOutputStream(fs);
|
||||
bos.write(out);
|
||||
bos.close();
|
||||
}
|
||||
|
||||
public static void main(String[] args) throws IOException {
|
||||
TestJavaBinCodec test = new TestJavaBinCodec();
|
||||
test.genBinaryFile();
|
||||
}
|
||||
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue