Made hole character in XAnalyzingSuggester part of Postingsformat
* Hole charactor now can change with new releases * Fixed bug where the SEP_LABEL constant was used instead of the sepLabel instance variable * Replaced if- with switch-statement
This commit is contained in:
parent
1e85e4dd26
commit
11de330246
|
@ -92,7 +92,7 @@ import java.util.*;
|
|||
* @lucene.experimental
|
||||
*/
|
||||
public class XAnalyzingSuggester extends Lookup {
|
||||
|
||||
|
||||
/**
|
||||
* FST<Weight,Surface>:
|
||||
* input is the analyzed form, with a null byte between terms
|
||||
|
@ -124,14 +124,14 @@ public class XAnalyzingSuggester extends Lookup {
|
|||
private final boolean preserveSep;
|
||||
|
||||
/** Include this flag in the options parameter to {@link
|
||||
* #XAnalyzingSuggester(Analyzer,Analyzer,int,int,int,boolean,FST,boolean,int,int,int,int)} to always
|
||||
* #XAnalyzingSuggester(Analyzer,Analyzer,int,int,int,boolean,FST,boolean,int,int,int,int,int)} to always
|
||||
* return the exact match first, regardless of score. This
|
||||
* has no performance impact but could result in
|
||||
* low-quality suggestions. */
|
||||
public static final int EXACT_FIRST = 1;
|
||||
|
||||
/** Include this flag in the options parameter to {@link
|
||||
* #XAnalyzingSuggester(Analyzer,Analyzer,int,int,int,boolean,FST,boolean,int,int,int,int)} to preserve
|
||||
* #XAnalyzingSuggester(Analyzer,Analyzer,int,int,int,boolean,FST,boolean,int,int,int,int,int)} to preserve
|
||||
* token separators when matching. */
|
||||
public static final int PRESERVE_SEP = 2;
|
||||
|
||||
|
@ -163,6 +163,7 @@ public class XAnalyzingSuggester extends Lookup {
|
|||
private final int sepLabel;
|
||||
private final int payloadSep;
|
||||
private final int endByte;
|
||||
private final int holeCharacter;
|
||||
|
||||
public static final int PAYLOAD_SEP = '\u001F';
|
||||
public static final int HOLE_CHARACTER = '\u001E';
|
||||
|
@ -171,21 +172,21 @@ public class XAnalyzingSuggester extends Lookup {
|
|||
private boolean preservePositionIncrements;
|
||||
|
||||
/**
|
||||
* Calls {@link #XAnalyzingSuggester(Analyzer,Analyzer,int,int,int,boolean,FST,boolean,int,int,int,int)
|
||||
* Calls {@link #XAnalyzingSuggester(Analyzer,Analyzer,int,int,int,boolean,FST,boolean,int,int,int,int,int)
|
||||
* AnalyzingSuggester(analyzer, analyzer, EXACT_FIRST |
|
||||
* PRESERVE_SEP, 256, -1)}
|
||||
*/
|
||||
public XAnalyzingSuggester(Analyzer analyzer) {
|
||||
this(analyzer, analyzer, EXACT_FIRST | PRESERVE_SEP, 256, -1, true, null, false, 0, SEP_LABEL, PAYLOAD_SEP, END_BYTE);
|
||||
this(analyzer, analyzer, EXACT_FIRST | PRESERVE_SEP, 256, -1, true, null, false, 0, SEP_LABEL, PAYLOAD_SEP, END_BYTE, HOLE_CHARACTER);
|
||||
}
|
||||
|
||||
/**
|
||||
* Calls {@link #XAnalyzingSuggester(Analyzer,Analyzer,int,int,int,boolean,FST,boolean,int,int,int,int)
|
||||
* Calls {@link #XAnalyzingSuggester(Analyzer,Analyzer,int,int,int,boolean,FST,boolean,int,int,int,int,int)
|
||||
* AnalyzingSuggester(indexAnalyzer, queryAnalyzer, EXACT_FIRST |
|
||||
* PRESERVE_SEP, 256, -1)}
|
||||
*/
|
||||
public XAnalyzingSuggester(Analyzer indexAnalyzer, Analyzer queryAnalyzer) {
|
||||
this(indexAnalyzer, queryAnalyzer, EXACT_FIRST | PRESERVE_SEP, 256, -1, true, null, false, 0, SEP_LABEL, PAYLOAD_SEP, END_BYTE);
|
||||
this(indexAnalyzer, queryAnalyzer, EXACT_FIRST | PRESERVE_SEP, 256, -1, true, null, false, 0, SEP_LABEL, PAYLOAD_SEP, END_BYTE, HOLE_CHARACTER);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -206,7 +207,7 @@ public class XAnalyzingSuggester extends Lookup {
|
|||
*/
|
||||
public XAnalyzingSuggester(Analyzer indexAnalyzer, Analyzer queryAnalyzer, int options, int maxSurfaceFormsPerAnalyzedForm, int maxGraphExpansions,
|
||||
boolean preservePositionIncrements, FST<Pair<Long, BytesRef>> fst, boolean hasPayloads, int maxAnalyzedPathsForOneInput,
|
||||
int sepLabel, int payloadSep, int endByte) {
|
||||
int sepLabel, int payloadSep, int endByte, int holeCharacter) {
|
||||
// SIMON EDIT: I added fst, hasPayloads and maxAnalyzedPathsForOneInput
|
||||
this.indexAnalyzer = indexAnalyzer;
|
||||
this.queryAnalyzer = queryAnalyzer;
|
||||
|
@ -236,6 +237,7 @@ public class XAnalyzingSuggester extends Lookup {
|
|||
this.sepLabel = sepLabel;
|
||||
this.payloadSep = payloadSep;
|
||||
this.endByte = endByte;
|
||||
this.holeCharacter = holeCharacter;
|
||||
}
|
||||
|
||||
/** Returns byte size of the underlying FST. */
|
||||
|
@ -682,10 +684,10 @@ public class XAnalyzingSuggester extends Lookup {
|
|||
|
||||
//System.out.println("lookup key=" + key + " num=" + num);
|
||||
for (int i = 0; i < key.length(); i++) {
|
||||
if (key.charAt(i) == HOLE_CHARACTER) {
|
||||
if (key.charAt(i) == holeCharacter) {
|
||||
throw new IllegalArgumentException("lookup key cannot contain HOLE character U+001E; this character is reserved");
|
||||
}
|
||||
if (key.charAt(i) == SEP_LABEL) {
|
||||
if (key.charAt(i) == sepLabel) {
|
||||
throw new IllegalArgumentException("lookup key cannot contain unit separator character U+001F; this character is reserved");
|
||||
}
|
||||
}
|
||||
|
|
|
@ -122,7 +122,7 @@ public final class XFuzzySuggester extends XAnalyzingSuggester {
|
|||
*/
|
||||
public XFuzzySuggester(Analyzer indexAnalyzer, Analyzer queryAnalyzer) {
|
||||
this(indexAnalyzer, queryAnalyzer, EXACT_FIRST | PRESERVE_SEP, 256, -1, DEFAULT_MAX_EDITS, DEFAULT_TRANSPOSITIONS,
|
||||
DEFAULT_NON_FUZZY_PREFIX, DEFAULT_MIN_FUZZY_LENGTH, DEFAULT_UNICODE_AWARE, null, false, 0, SEP_LABEL, PAYLOAD_SEP, END_BYTE);
|
||||
DEFAULT_NON_FUZZY_PREFIX, DEFAULT_MIN_FUZZY_LENGTH, DEFAULT_UNICODE_AWARE, null, false, 0, SEP_LABEL, PAYLOAD_SEP, END_BYTE, HOLE_CHARACTER);
|
||||
|
||||
}
|
||||
|
||||
|
@ -154,8 +154,8 @@ public final class XFuzzySuggester extends XAnalyzingSuggester {
|
|||
public XFuzzySuggester(Analyzer indexAnalyzer, Analyzer queryAnalyzer, int options, int maxSurfaceFormsPerAnalyzedForm, int maxGraphExpansions,
|
||||
int maxEdits, boolean transpositions, int nonFuzzyPrefix, int minFuzzyLength, boolean unicodeAware,
|
||||
FST<PairOutputs.Pair<Long, BytesRef>> fst, boolean hasPayloads, int maxAnalyzedPathsForOneInput,
|
||||
int sepLabel, int payloadSep, int endByte) {
|
||||
super(indexAnalyzer, queryAnalyzer, options, maxSurfaceFormsPerAnalyzedForm, maxGraphExpansions, true, fst, hasPayloads, maxAnalyzedPathsForOneInput, sepLabel, payloadSep, endByte);
|
||||
int sepLabel, int payloadSep, int endByte, int holeCharacter) {
|
||||
super(indexAnalyzer, queryAnalyzer, options, maxSurfaceFormsPerAnalyzedForm, maxGraphExpansions, true, fst, hasPayloads, maxAnalyzedPathsForOneInput, sepLabel, payloadSep, endByte, holeCharacter);
|
||||
if (maxEdits < 0 || maxEdits > LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE) {
|
||||
throw new IllegalArgumentException("maxEdits must be between 0 and " + LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE);
|
||||
}
|
||||
|
|
|
@ -74,7 +74,7 @@ public class AnalyzingCompletionLookupProvider extends CompletionLookupProvider
|
|||
int options = preserveSep ? XAnalyzingSuggester.PRESERVE_SEP : 0;
|
||||
// needs to fixed in the suggester first before it can be supported
|
||||
//options |= exactFirst ? XAnalyzingSuggester.EXACT_FIRST : 0;
|
||||
prototype = new XAnalyzingSuggester(null, null, options, maxSurfaceFormsPerAnalyzedForm, maxGraphExpansions, preservePositionIncrements, null, false, 1, XAnalyzingSuggester.SEP_LABEL, XAnalyzingSuggester.PAYLOAD_SEP, XAnalyzingSuggester.END_BYTE);
|
||||
prototype = new XAnalyzingSuggester(null, null, options, maxSurfaceFormsPerAnalyzedForm, maxGraphExpansions, preservePositionIncrements, null, false, 1, XAnalyzingSuggester.SEP_LABEL, XAnalyzingSuggester.PAYLOAD_SEP, XAnalyzingSuggester.END_BYTE, XAnalyzingSuggester.HOLE_CHARACTER);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -231,19 +231,23 @@ public class AnalyzingCompletionLookupProvider extends CompletionLookupProvider
|
|||
|
||||
// first version did not include these three fields, so fall back to old default (before the analyzingsuggester
|
||||
// was updated in Lucene, so we cannot use the suggester defaults)
|
||||
int sepLabel, payloadSep, endByte;
|
||||
if (version == CODEC_VERSION_START) {
|
||||
sepLabel = 0xFF;
|
||||
payloadSep = '\u001f';
|
||||
endByte = 0x0;
|
||||
} else {
|
||||
sepLabel = input.readVInt();
|
||||
endByte = input.readVInt();
|
||||
payloadSep = input.readVInt();
|
||||
int sepLabel, payloadSep, endByte, holeCharacter;
|
||||
switch (version) {
|
||||
case CODEC_VERSION_START:
|
||||
sepLabel = 0xFF;
|
||||
payloadSep = '\u001f';
|
||||
endByte = 0x0;
|
||||
holeCharacter = '\u001E';
|
||||
break;
|
||||
default:
|
||||
sepLabel = input.readVInt();
|
||||
endByte = input.readVInt();
|
||||
payloadSep = input.readVInt();
|
||||
holeCharacter = input.readVInt();
|
||||
}
|
||||
|
||||
AnalyzingSuggestHolder holder = new AnalyzingSuggestHolder(preserveSep, preservePositionIncrements, maxSurfaceFormsPerAnalyzedForm, maxGraphExpansions,
|
||||
hasPayloads, maxAnalyzedPathsForOneInput, fst, sepLabel, payloadSep, endByte);
|
||||
hasPayloads, maxAnalyzedPathsForOneInput, fst, sepLabel, payloadSep, endByte, holeCharacter);
|
||||
lookupMap.put(entry.getValue(), holder);
|
||||
}
|
||||
return new LookupFactory() {
|
||||
|
@ -262,13 +266,15 @@ public class AnalyzingCompletionLookupProvider extends CompletionLookupProvider
|
|||
suggestionContext.getFuzzyEditDistance(), suggestionContext.isFuzzyTranspositions(),
|
||||
suggestionContext.getFuzzyPrefixLength(), suggestionContext.getFuzzyMinLength(), suggestionContext.isFuzzyUnicodeAware(),
|
||||
analyzingSuggestHolder.fst, analyzingSuggestHolder.hasPayloads,
|
||||
analyzingSuggestHolder.maxAnalyzedPathsForOneInput, analyzingSuggestHolder.sepLabel, analyzingSuggestHolder.payloadSep, analyzingSuggestHolder.endByte);
|
||||
analyzingSuggestHolder.maxAnalyzedPathsForOneInput, analyzingSuggestHolder.sepLabel, analyzingSuggestHolder.payloadSep, analyzingSuggestHolder.endByte,
|
||||
analyzingSuggestHolder.holeCharacter);
|
||||
|
||||
} else {
|
||||
suggester = new XAnalyzingSuggester(mapper.indexAnalyzer(), mapper.searchAnalyzer(), flags,
|
||||
analyzingSuggestHolder.maxSurfaceFormsPerAnalyzedForm, analyzingSuggestHolder.maxGraphExpansions,
|
||||
analyzingSuggestHolder.preservePositionIncrements, analyzingSuggestHolder.fst, analyzingSuggestHolder.hasPayloads,
|
||||
analyzingSuggestHolder.maxAnalyzedPathsForOneInput, analyzingSuggestHolder.sepLabel, analyzingSuggestHolder.payloadSep, analyzingSuggestHolder.endByte);
|
||||
analyzingSuggestHolder.maxAnalyzedPathsForOneInput, analyzingSuggestHolder.sepLabel, analyzingSuggestHolder.payloadSep, analyzingSuggestHolder.endByte,
|
||||
analyzingSuggestHolder.holeCharacter);
|
||||
}
|
||||
return suggester;
|
||||
}
|
||||
|
@ -316,13 +322,14 @@ public class AnalyzingCompletionLookupProvider extends CompletionLookupProvider
|
|||
final int sepLabel;
|
||||
final int payloadSep;
|
||||
final int endByte;
|
||||
final int holeCharacter;
|
||||
|
||||
public AnalyzingSuggestHolder(boolean preserveSep, boolean preservePositionIncrements, int maxSurfaceFormsPerAnalyzedForm, int maxGraphExpansions,
|
||||
boolean hasPayloads, int maxAnalyzedPathsForOneInput, FST<Pair<Long, BytesRef>> fst) {
|
||||
this(preserveSep, preservePositionIncrements, maxSurfaceFormsPerAnalyzedForm, maxGraphExpansions, hasPayloads, maxAnalyzedPathsForOneInput, fst, XAnalyzingSuggester.SEP_LABEL, XAnalyzingSuggester.PAYLOAD_SEP, XAnalyzingSuggester.END_BYTE);
|
||||
this(preserveSep, preservePositionIncrements, maxSurfaceFormsPerAnalyzedForm, maxGraphExpansions, hasPayloads, maxAnalyzedPathsForOneInput, fst, XAnalyzingSuggester.SEP_LABEL, XAnalyzingSuggester.PAYLOAD_SEP, XAnalyzingSuggester.END_BYTE, XAnalyzingSuggester.HOLE_CHARACTER);
|
||||
}
|
||||
|
||||
public AnalyzingSuggestHolder(boolean preserveSep, boolean preservePositionIncrements, int maxSurfaceFormsPerAnalyzedForm, int maxGraphExpansions, boolean hasPayloads, int maxAnalyzedPathsForOneInput, FST<Pair<Long, BytesRef>> fst, int sepLabel, int payloadSep, int endByte) {
|
||||
public AnalyzingSuggestHolder(boolean preserveSep, boolean preservePositionIncrements, int maxSurfaceFormsPerAnalyzedForm, int maxGraphExpansions, boolean hasPayloads, int maxAnalyzedPathsForOneInput, FST<Pair<Long, BytesRef>> fst, int sepLabel, int payloadSep, int endByte, int holeCharacter) {
|
||||
this.preserveSep = preserveSep;
|
||||
this.preservePositionIncrements = preservePositionIncrements;
|
||||
this.maxSurfaceFormsPerAnalyzedForm = maxSurfaceFormsPerAnalyzedForm;
|
||||
|
@ -333,6 +340,7 @@ public class AnalyzingCompletionLookupProvider extends CompletionLookupProvider
|
|||
this.sepLabel = sepLabel;
|
||||
this.payloadSep = payloadSep;
|
||||
this.endByte = endByte;
|
||||
this.holeCharacter = holeCharacter;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -85,7 +85,7 @@ public class AnalyzingCompletionLookupProviderV1 extends CompletionLookupProvide
|
|||
// needs to fixed in the suggester first before it can be supported
|
||||
//options |= exactFirst ? XAnalyzingSuggester.EXACT_FIRST : 0;
|
||||
prototype = new XAnalyzingSuggester(null, null, options, maxSurfaceFormsPerAnalyzedForm, maxGraphExpansions, preservePositionIncrements,
|
||||
null, false, 1, SEP_LABEL, PAYLOAD_SEP, END_BYTE);
|
||||
null, false, 1, SEP_LABEL, PAYLOAD_SEP, END_BYTE, XAnalyzingSuggester.HOLE_CHARACTER);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -255,14 +255,14 @@ public class AnalyzingCompletionLookupProviderV1 extends CompletionLookupProvide
|
|||
suggestionContext.getFuzzyEditDistance(), suggestionContext.isFuzzyTranspositions(),
|
||||
suggestionContext.getFuzzyPrefixLength(), suggestionContext.getFuzzyMinLength(), false,
|
||||
analyzingSuggestHolder.fst, analyzingSuggestHolder.hasPayloads,
|
||||
analyzingSuggestHolder.maxAnalyzedPathsForOneInput, SEP_LABEL, PAYLOAD_SEP, END_BYTE);
|
||||
analyzingSuggestHolder.maxAnalyzedPathsForOneInput, SEP_LABEL, PAYLOAD_SEP, END_BYTE, XAnalyzingSuggester.HOLE_CHARACTER);
|
||||
|
||||
} else {
|
||||
suggester = new XAnalyzingSuggester(mapper.indexAnalyzer(), mapper.searchAnalyzer(), flags,
|
||||
analyzingSuggestHolder.maxSurfaceFormsPerAnalyzedForm, analyzingSuggestHolder.maxGraphExpansions,
|
||||
analyzingSuggestHolder.preservePositionIncrements,
|
||||
analyzingSuggestHolder.fst, analyzingSuggestHolder.hasPayloads,
|
||||
analyzingSuggestHolder.maxAnalyzedPathsForOneInput, SEP_LABEL, PAYLOAD_SEP, END_BYTE);
|
||||
analyzingSuggestHolder.maxAnalyzedPathsForOneInput, SEP_LABEL, PAYLOAD_SEP, END_BYTE, XAnalyzingSuggester.HOLE_CHARACTER);
|
||||
}
|
||||
return suggester;
|
||||
}
|
||||
|
|
|
@ -123,7 +123,7 @@ public class CompletionPostingsFormatTest extends ElasticsearchTestCase {
|
|||
final int options = preserveSeparators ? AnalyzingSuggester.PRESERVE_SEP : 0;
|
||||
|
||||
XAnalyzingSuggester reference = new XAnalyzingSuggester(new StandardAnalyzer(TEST_VERSION_CURRENT), new StandardAnalyzer(
|
||||
TEST_VERSION_CURRENT), options, 256, -1, preservePositionIncrements, null, false, 1, XAnalyzingSuggester.SEP_LABEL, XAnalyzingSuggester.PAYLOAD_SEP, XAnalyzingSuggester.END_BYTE);
|
||||
TEST_VERSION_CURRENT), options, 256, -1, preservePositionIncrements, null, false, 1, XAnalyzingSuggester.SEP_LABEL, XAnalyzingSuggester.PAYLOAD_SEP, XAnalyzingSuggester.END_BYTE, XAnalyzingSuggester.HOLE_CHARACTER);
|
||||
LineFileDocs docs = new LineFileDocs(getRandom());
|
||||
int num = atLeast(150);
|
||||
final String[] titles = new String[num];
|
||||
|
|
Loading…
Reference in New Issue