From 11de330246643418cf57509f77543b21e862bed5 Mon Sep 17 00:00:00 2001 From: Alexander Reelsen Date: Wed, 27 Nov 2013 13:21:53 +0100 Subject: [PATCH] Made hole character in XAnalyzingSuggester part of Postingsformat * Hole charactor now can change with new releases * Fixed bug where the SEP_LABEL constant was used instead of the sepLabel instance variable * Replaced if- with switch-statement --- .../analyzing/XAnalyzingSuggester.java | 22 ++++++----- .../suggest/analyzing/XFuzzySuggester.java | 6 +-- .../AnalyzingCompletionLookupProvider.java | 38 +++++++++++-------- .../AnalyzingCompletionLookupProviderV1.java | 6 +-- .../CompletionPostingsFormatTest.java | 2 +- 5 files changed, 42 insertions(+), 32 deletions(-) diff --git a/src/main/java/org/apache/lucene/search/suggest/analyzing/XAnalyzingSuggester.java b/src/main/java/org/apache/lucene/search/suggest/analyzing/XAnalyzingSuggester.java index 43805769d93..751412137c0 100644 --- a/src/main/java/org/apache/lucene/search/suggest/analyzing/XAnalyzingSuggester.java +++ b/src/main/java/org/apache/lucene/search/suggest/analyzing/XAnalyzingSuggester.java @@ -92,7 +92,7 @@ import java.util.*; * @lucene.experimental */ public class XAnalyzingSuggester extends Lookup { - + /** * FST: * input is the analyzed form, with a null byte between terms @@ -124,14 +124,14 @@ public class XAnalyzingSuggester extends Lookup { private final boolean preserveSep; /** Include this flag in the options parameter to {@link - * #XAnalyzingSuggester(Analyzer,Analyzer,int,int,int,boolean,FST,boolean,int,int,int,int)} to always + * #XAnalyzingSuggester(Analyzer,Analyzer,int,int,int,boolean,FST,boolean,int,int,int,int,int)} to always * return the exact match first, regardless of score. This * has no performance impact but could result in * low-quality suggestions. */ public static final int EXACT_FIRST = 1; /** Include this flag in the options parameter to {@link - * #XAnalyzingSuggester(Analyzer,Analyzer,int,int,int,boolean,FST,boolean,int,int,int,int)} to preserve + * #XAnalyzingSuggester(Analyzer,Analyzer,int,int,int,boolean,FST,boolean,int,int,int,int,int)} to preserve * token separators when matching. */ public static final int PRESERVE_SEP = 2; @@ -163,6 +163,7 @@ public class XAnalyzingSuggester extends Lookup { private final int sepLabel; private final int payloadSep; private final int endByte; + private final int holeCharacter; public static final int PAYLOAD_SEP = '\u001F'; public static final int HOLE_CHARACTER = '\u001E'; @@ -171,21 +172,21 @@ public class XAnalyzingSuggester extends Lookup { private boolean preservePositionIncrements; /** - * Calls {@link #XAnalyzingSuggester(Analyzer,Analyzer,int,int,int,boolean,FST,boolean,int,int,int,int) + * Calls {@link #XAnalyzingSuggester(Analyzer,Analyzer,int,int,int,boolean,FST,boolean,int,int,int,int,int) * AnalyzingSuggester(analyzer, analyzer, EXACT_FIRST | * PRESERVE_SEP, 256, -1)} */ public XAnalyzingSuggester(Analyzer analyzer) { - this(analyzer, analyzer, EXACT_FIRST | PRESERVE_SEP, 256, -1, true, null, false, 0, SEP_LABEL, PAYLOAD_SEP, END_BYTE); + this(analyzer, analyzer, EXACT_FIRST | PRESERVE_SEP, 256, -1, true, null, false, 0, SEP_LABEL, PAYLOAD_SEP, END_BYTE, HOLE_CHARACTER); } /** - * Calls {@link #XAnalyzingSuggester(Analyzer,Analyzer,int,int,int,boolean,FST,boolean,int,int,int,int) + * Calls {@link #XAnalyzingSuggester(Analyzer,Analyzer,int,int,int,boolean,FST,boolean,int,int,int,int,int) * AnalyzingSuggester(indexAnalyzer, queryAnalyzer, EXACT_FIRST | * PRESERVE_SEP, 256, -1)} */ public XAnalyzingSuggester(Analyzer indexAnalyzer, Analyzer queryAnalyzer) { - this(indexAnalyzer, queryAnalyzer, EXACT_FIRST | PRESERVE_SEP, 256, -1, true, null, false, 0, SEP_LABEL, PAYLOAD_SEP, END_BYTE); + this(indexAnalyzer, queryAnalyzer, EXACT_FIRST | PRESERVE_SEP, 256, -1, true, null, false, 0, SEP_LABEL, PAYLOAD_SEP, END_BYTE, HOLE_CHARACTER); } /** @@ -206,7 +207,7 @@ public class XAnalyzingSuggester extends Lookup { */ public XAnalyzingSuggester(Analyzer indexAnalyzer, Analyzer queryAnalyzer, int options, int maxSurfaceFormsPerAnalyzedForm, int maxGraphExpansions, boolean preservePositionIncrements, FST> fst, boolean hasPayloads, int maxAnalyzedPathsForOneInput, - int sepLabel, int payloadSep, int endByte) { + int sepLabel, int payloadSep, int endByte, int holeCharacter) { // SIMON EDIT: I added fst, hasPayloads and maxAnalyzedPathsForOneInput this.indexAnalyzer = indexAnalyzer; this.queryAnalyzer = queryAnalyzer; @@ -236,6 +237,7 @@ public class XAnalyzingSuggester extends Lookup { this.sepLabel = sepLabel; this.payloadSep = payloadSep; this.endByte = endByte; + this.holeCharacter = holeCharacter; } /** Returns byte size of the underlying FST. */ @@ -682,10 +684,10 @@ public class XAnalyzingSuggester extends Lookup { //System.out.println("lookup key=" + key + " num=" + num); for (int i = 0; i < key.length(); i++) { - if (key.charAt(i) == HOLE_CHARACTER) { + if (key.charAt(i) == holeCharacter) { throw new IllegalArgumentException("lookup key cannot contain HOLE character U+001E; this character is reserved"); } - if (key.charAt(i) == SEP_LABEL) { + if (key.charAt(i) == sepLabel) { throw new IllegalArgumentException("lookup key cannot contain unit separator character U+001F; this character is reserved"); } } diff --git a/src/main/java/org/apache/lucene/search/suggest/analyzing/XFuzzySuggester.java b/src/main/java/org/apache/lucene/search/suggest/analyzing/XFuzzySuggester.java index d364d29fa12..e0945867145 100644 --- a/src/main/java/org/apache/lucene/search/suggest/analyzing/XFuzzySuggester.java +++ b/src/main/java/org/apache/lucene/search/suggest/analyzing/XFuzzySuggester.java @@ -122,7 +122,7 @@ public final class XFuzzySuggester extends XAnalyzingSuggester { */ public XFuzzySuggester(Analyzer indexAnalyzer, Analyzer queryAnalyzer) { this(indexAnalyzer, queryAnalyzer, EXACT_FIRST | PRESERVE_SEP, 256, -1, DEFAULT_MAX_EDITS, DEFAULT_TRANSPOSITIONS, - DEFAULT_NON_FUZZY_PREFIX, DEFAULT_MIN_FUZZY_LENGTH, DEFAULT_UNICODE_AWARE, null, false, 0, SEP_LABEL, PAYLOAD_SEP, END_BYTE); + DEFAULT_NON_FUZZY_PREFIX, DEFAULT_MIN_FUZZY_LENGTH, DEFAULT_UNICODE_AWARE, null, false, 0, SEP_LABEL, PAYLOAD_SEP, END_BYTE, HOLE_CHARACTER); } @@ -154,8 +154,8 @@ public final class XFuzzySuggester extends XAnalyzingSuggester { public XFuzzySuggester(Analyzer indexAnalyzer, Analyzer queryAnalyzer, int options, int maxSurfaceFormsPerAnalyzedForm, int maxGraphExpansions, int maxEdits, boolean transpositions, int nonFuzzyPrefix, int minFuzzyLength, boolean unicodeAware, FST> fst, boolean hasPayloads, int maxAnalyzedPathsForOneInput, - int sepLabel, int payloadSep, int endByte) { - super(indexAnalyzer, queryAnalyzer, options, maxSurfaceFormsPerAnalyzedForm, maxGraphExpansions, true, fst, hasPayloads, maxAnalyzedPathsForOneInput, sepLabel, payloadSep, endByte); + int sepLabel, int payloadSep, int endByte, int holeCharacter) { + super(indexAnalyzer, queryAnalyzer, options, maxSurfaceFormsPerAnalyzedForm, maxGraphExpansions, true, fst, hasPayloads, maxAnalyzedPathsForOneInput, sepLabel, payloadSep, endByte, holeCharacter); if (maxEdits < 0 || maxEdits > LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE) { throw new IllegalArgumentException("maxEdits must be between 0 and " + LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE); } diff --git a/src/main/java/org/elasticsearch/search/suggest/completion/AnalyzingCompletionLookupProvider.java b/src/main/java/org/elasticsearch/search/suggest/completion/AnalyzingCompletionLookupProvider.java index b13356df642..3978e9ffcec 100644 --- a/src/main/java/org/elasticsearch/search/suggest/completion/AnalyzingCompletionLookupProvider.java +++ b/src/main/java/org/elasticsearch/search/suggest/completion/AnalyzingCompletionLookupProvider.java @@ -74,7 +74,7 @@ public class AnalyzingCompletionLookupProvider extends CompletionLookupProvider int options = preserveSep ? XAnalyzingSuggester.PRESERVE_SEP : 0; // needs to fixed in the suggester first before it can be supported //options |= exactFirst ? XAnalyzingSuggester.EXACT_FIRST : 0; - prototype = new XAnalyzingSuggester(null, null, options, maxSurfaceFormsPerAnalyzedForm, maxGraphExpansions, preservePositionIncrements, null, false, 1, XAnalyzingSuggester.SEP_LABEL, XAnalyzingSuggester.PAYLOAD_SEP, XAnalyzingSuggester.END_BYTE); + prototype = new XAnalyzingSuggester(null, null, options, maxSurfaceFormsPerAnalyzedForm, maxGraphExpansions, preservePositionIncrements, null, false, 1, XAnalyzingSuggester.SEP_LABEL, XAnalyzingSuggester.PAYLOAD_SEP, XAnalyzingSuggester.END_BYTE, XAnalyzingSuggester.HOLE_CHARACTER); } @Override @@ -231,19 +231,23 @@ public class AnalyzingCompletionLookupProvider extends CompletionLookupProvider // first version did not include these three fields, so fall back to old default (before the analyzingsuggester // was updated in Lucene, so we cannot use the suggester defaults) - int sepLabel, payloadSep, endByte; - if (version == CODEC_VERSION_START) { - sepLabel = 0xFF; - payloadSep = '\u001f'; - endByte = 0x0; - } else { - sepLabel = input.readVInt(); - endByte = input.readVInt(); - payloadSep = input.readVInt(); + int sepLabel, payloadSep, endByte, holeCharacter; + switch (version) { + case CODEC_VERSION_START: + sepLabel = 0xFF; + payloadSep = '\u001f'; + endByte = 0x0; + holeCharacter = '\u001E'; + break; + default: + sepLabel = input.readVInt(); + endByte = input.readVInt(); + payloadSep = input.readVInt(); + holeCharacter = input.readVInt(); } AnalyzingSuggestHolder holder = new AnalyzingSuggestHolder(preserveSep, preservePositionIncrements, maxSurfaceFormsPerAnalyzedForm, maxGraphExpansions, - hasPayloads, maxAnalyzedPathsForOneInput, fst, sepLabel, payloadSep, endByte); + hasPayloads, maxAnalyzedPathsForOneInput, fst, sepLabel, payloadSep, endByte, holeCharacter); lookupMap.put(entry.getValue(), holder); } return new LookupFactory() { @@ -262,13 +266,15 @@ public class AnalyzingCompletionLookupProvider extends CompletionLookupProvider suggestionContext.getFuzzyEditDistance(), suggestionContext.isFuzzyTranspositions(), suggestionContext.getFuzzyPrefixLength(), suggestionContext.getFuzzyMinLength(), suggestionContext.isFuzzyUnicodeAware(), analyzingSuggestHolder.fst, analyzingSuggestHolder.hasPayloads, - analyzingSuggestHolder.maxAnalyzedPathsForOneInput, analyzingSuggestHolder.sepLabel, analyzingSuggestHolder.payloadSep, analyzingSuggestHolder.endByte); + analyzingSuggestHolder.maxAnalyzedPathsForOneInput, analyzingSuggestHolder.sepLabel, analyzingSuggestHolder.payloadSep, analyzingSuggestHolder.endByte, + analyzingSuggestHolder.holeCharacter); } else { suggester = new XAnalyzingSuggester(mapper.indexAnalyzer(), mapper.searchAnalyzer(), flags, analyzingSuggestHolder.maxSurfaceFormsPerAnalyzedForm, analyzingSuggestHolder.maxGraphExpansions, analyzingSuggestHolder.preservePositionIncrements, analyzingSuggestHolder.fst, analyzingSuggestHolder.hasPayloads, - analyzingSuggestHolder.maxAnalyzedPathsForOneInput, analyzingSuggestHolder.sepLabel, analyzingSuggestHolder.payloadSep, analyzingSuggestHolder.endByte); + analyzingSuggestHolder.maxAnalyzedPathsForOneInput, analyzingSuggestHolder.sepLabel, analyzingSuggestHolder.payloadSep, analyzingSuggestHolder.endByte, + analyzingSuggestHolder.holeCharacter); } return suggester; } @@ -316,13 +322,14 @@ public class AnalyzingCompletionLookupProvider extends CompletionLookupProvider final int sepLabel; final int payloadSep; final int endByte; + final int holeCharacter; public AnalyzingSuggestHolder(boolean preserveSep, boolean preservePositionIncrements, int maxSurfaceFormsPerAnalyzedForm, int maxGraphExpansions, boolean hasPayloads, int maxAnalyzedPathsForOneInput, FST> fst) { - this(preserveSep, preservePositionIncrements, maxSurfaceFormsPerAnalyzedForm, maxGraphExpansions, hasPayloads, maxAnalyzedPathsForOneInput, fst, XAnalyzingSuggester.SEP_LABEL, XAnalyzingSuggester.PAYLOAD_SEP, XAnalyzingSuggester.END_BYTE); + this(preserveSep, preservePositionIncrements, maxSurfaceFormsPerAnalyzedForm, maxGraphExpansions, hasPayloads, maxAnalyzedPathsForOneInput, fst, XAnalyzingSuggester.SEP_LABEL, XAnalyzingSuggester.PAYLOAD_SEP, XAnalyzingSuggester.END_BYTE, XAnalyzingSuggester.HOLE_CHARACTER); } - public AnalyzingSuggestHolder(boolean preserveSep, boolean preservePositionIncrements, int maxSurfaceFormsPerAnalyzedForm, int maxGraphExpansions, boolean hasPayloads, int maxAnalyzedPathsForOneInput, FST> fst, int sepLabel, int payloadSep, int endByte) { + public AnalyzingSuggestHolder(boolean preserveSep, boolean preservePositionIncrements, int maxSurfaceFormsPerAnalyzedForm, int maxGraphExpansions, boolean hasPayloads, int maxAnalyzedPathsForOneInput, FST> fst, int sepLabel, int payloadSep, int endByte, int holeCharacter) { this.preserveSep = preserveSep; this.preservePositionIncrements = preservePositionIncrements; this.maxSurfaceFormsPerAnalyzedForm = maxSurfaceFormsPerAnalyzedForm; @@ -333,6 +340,7 @@ public class AnalyzingCompletionLookupProvider extends CompletionLookupProvider this.sepLabel = sepLabel; this.payloadSep = payloadSep; this.endByte = endByte; + this.holeCharacter = holeCharacter; } } diff --git a/src/test/java/org/elasticsearch/search/suggest/completion/AnalyzingCompletionLookupProviderV1.java b/src/test/java/org/elasticsearch/search/suggest/completion/AnalyzingCompletionLookupProviderV1.java index e866790c0cd..11312c4ac63 100644 --- a/src/test/java/org/elasticsearch/search/suggest/completion/AnalyzingCompletionLookupProviderV1.java +++ b/src/test/java/org/elasticsearch/search/suggest/completion/AnalyzingCompletionLookupProviderV1.java @@ -85,7 +85,7 @@ public class AnalyzingCompletionLookupProviderV1 extends CompletionLookupProvide // needs to fixed in the suggester first before it can be supported //options |= exactFirst ? XAnalyzingSuggester.EXACT_FIRST : 0; prototype = new XAnalyzingSuggester(null, null, options, maxSurfaceFormsPerAnalyzedForm, maxGraphExpansions, preservePositionIncrements, - null, false, 1, SEP_LABEL, PAYLOAD_SEP, END_BYTE); + null, false, 1, SEP_LABEL, PAYLOAD_SEP, END_BYTE, XAnalyzingSuggester.HOLE_CHARACTER); } @Override @@ -255,14 +255,14 @@ public class AnalyzingCompletionLookupProviderV1 extends CompletionLookupProvide suggestionContext.getFuzzyEditDistance(), suggestionContext.isFuzzyTranspositions(), suggestionContext.getFuzzyPrefixLength(), suggestionContext.getFuzzyMinLength(), false, analyzingSuggestHolder.fst, analyzingSuggestHolder.hasPayloads, - analyzingSuggestHolder.maxAnalyzedPathsForOneInput, SEP_LABEL, PAYLOAD_SEP, END_BYTE); + analyzingSuggestHolder.maxAnalyzedPathsForOneInput, SEP_LABEL, PAYLOAD_SEP, END_BYTE, XAnalyzingSuggester.HOLE_CHARACTER); } else { suggester = new XAnalyzingSuggester(mapper.indexAnalyzer(), mapper.searchAnalyzer(), flags, analyzingSuggestHolder.maxSurfaceFormsPerAnalyzedForm, analyzingSuggestHolder.maxGraphExpansions, analyzingSuggestHolder.preservePositionIncrements, analyzingSuggestHolder.fst, analyzingSuggestHolder.hasPayloads, - analyzingSuggestHolder.maxAnalyzedPathsForOneInput, SEP_LABEL, PAYLOAD_SEP, END_BYTE); + analyzingSuggestHolder.maxAnalyzedPathsForOneInput, SEP_LABEL, PAYLOAD_SEP, END_BYTE, XAnalyzingSuggester.HOLE_CHARACTER); } return suggester; } diff --git a/src/test/java/org/elasticsearch/search/suggest/completion/CompletionPostingsFormatTest.java b/src/test/java/org/elasticsearch/search/suggest/completion/CompletionPostingsFormatTest.java index 8f0660a5f66..c9a13e01aba 100644 --- a/src/test/java/org/elasticsearch/search/suggest/completion/CompletionPostingsFormatTest.java +++ b/src/test/java/org/elasticsearch/search/suggest/completion/CompletionPostingsFormatTest.java @@ -123,7 +123,7 @@ public class CompletionPostingsFormatTest extends ElasticsearchTestCase { final int options = preserveSeparators ? AnalyzingSuggester.PRESERVE_SEP : 0; XAnalyzingSuggester reference = new XAnalyzingSuggester(new StandardAnalyzer(TEST_VERSION_CURRENT), new StandardAnalyzer( - TEST_VERSION_CURRENT), options, 256, -1, preservePositionIncrements, null, false, 1, XAnalyzingSuggester.SEP_LABEL, XAnalyzingSuggester.PAYLOAD_SEP, XAnalyzingSuggester.END_BYTE); + TEST_VERSION_CURRENT), options, 256, -1, preservePositionIncrements, null, false, 1, XAnalyzingSuggester.SEP_LABEL, XAnalyzingSuggester.PAYLOAD_SEP, XAnalyzingSuggester.END_BYTE, XAnalyzingSuggester.HOLE_CHARACTER); LineFileDocs docs = new LineFileDocs(getRandom()); int num = atLeast(150); final String[] titles = new String[num];