Rework on the expand issue replaced the EdgeAnalyzer
This commit is contained in:
parent
a511543e77
commit
014e2ba9cb
|
@ -79,28 +79,4 @@ public class StringUtil {
|
|||
return new String(bytes, StandardCharsets.UTF_8);
|
||||
}
|
||||
|
||||
/**
|
||||
* Check the input string has prefix token
|
||||
*
|
||||
* e.g. For a display text of “Body height”:
|
||||
* searching on "Bo" or "he" should find it
|
||||
* searching on "ei" or "dy" should not find it
|
||||
*
|
||||
* @param theInput the input string
|
||||
* @param thePrefix the prefix string of a token
|
||||
* @return Return true if a input string token separated by space start with the prefix
|
||||
*/
|
||||
public static boolean isStartsWithIgnoreCase(final String theInput, final String thePrefix) {
|
||||
|
||||
if (theInput == null || thePrefix == null)
|
||||
return false;
|
||||
|
||||
StringTokenizer tok = new StringTokenizer(theInput);
|
||||
while (tok.hasMoreTokens()) {
|
||||
if (org.apache.commons.lang3.StringUtils.startsWithIgnoreCase(tok.nextToken(), thePrefix))
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -53,14 +53,4 @@ public class StringUtilTest {
|
|||
assertEquals("a/a", StringUtil.chompCharacter("a/a////", '/'));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testIsStartsWithIgnoreCase() {
|
||||
assertFalse(StringUtil.isStartsWithIgnoreCase(null, null));
|
||||
assertFalse(StringUtil.isStartsWithIgnoreCase(null, "hei"));
|
||||
assertFalse(StringUtil.isStartsWithIgnoreCase("Body height", null));
|
||||
assertTrue(StringUtil.isStartsWithIgnoreCase("Body height", "he"));
|
||||
assertTrue(StringUtil.isStartsWithIgnoreCase("Body height", "bo"));
|
||||
assertFalse(StringUtil.isStartsWithIgnoreCase("Body height", "ei"));
|
||||
assertFalse(StringUtil.isStartsWithIgnoreCase("Body height", "dy"));
|
||||
}
|
||||
}
|
||||
|
|
|
@ -74,6 +74,7 @@ public class TermConcept implements Serializable {
|
|||
@Fields({
|
||||
@Field(name = "myDisplay", index = org.hibernate.search.annotations.Index.YES, store = Store.YES, analyze = Analyze.YES, analyzer = @Analyzer(definition = "standardAnalyzer")),
|
||||
@Field(name = "myDisplayEdgeNGram", index = org.hibernate.search.annotations.Index.YES, store = Store.NO, analyze = Analyze.YES, analyzer = @Analyzer(definition = "autocompleteEdgeAnalyzer")),
|
||||
@Field(name = "myDisplayWordEdgeNGram", index = org.hibernate.search.annotations.Index.YES, store = Store.NO, analyze = Analyze.YES, analyzer = @Analyzer(definition = "autocompleteWordEdgeAnalyzer")),
|
||||
@Field(name = "myDisplayNGram", index = org.hibernate.search.annotations.Index.YES, store = Store.NO, analyze = Analyze.YES, analyzer = @Analyzer(definition = "autocompleteNGramAnalyzer")),
|
||||
@Field(name = "myDisplayPhonetic", index = org.hibernate.search.annotations.Index.YES, store = Store.NO, analyze = Analyze.YES, analyzer = @Analyzer(definition = "autocompletePhoneticAnalyzer"))
|
||||
})
|
||||
|
|
|
@ -48,6 +48,12 @@ public class LuceneSearchMappingFactory {
|
|||
.filter(EdgeNGramFilterFactory.class)
|
||||
.param("minGramSize", "3")
|
||||
.param("maxGramSize", "50")
|
||||
.analyzerDef("autocompleteWordEdgeAnalyzer", StandardTokenizerFactory.class)
|
||||
.filter(LowerCaseFilterFactory.class)
|
||||
.filter(StopFilterFactory.class)
|
||||
.filter(EdgeNGramFilterFactory.class)
|
||||
.param("minGramSize", "2")
|
||||
.param("maxGramSize", "20")
|
||||
.analyzerDef("autocompletePhoneticAnalyzer", StandardTokenizerFactory.class)
|
||||
.filter(StandardFilterFactory.class)
|
||||
.filter(StopFilterFactory.class)
|
||||
|
|
|
@ -36,6 +36,14 @@ public class ElasticsearchMappingProvider implements ElasticsearchAnalysisDefini
|
|||
.param("min_gram", "3")
|
||||
.param("max_gram", "50");
|
||||
|
||||
builder.analyzer("autocompleteWordEdgeAnalyzer")
|
||||
.withTokenizer("standard")
|
||||
.withTokenFilters("lowercase", "stop", "edgengram_3_50");
|
||||
builder.tokenFilter("edgengram_3_50")
|
||||
.type("edgeNGram")
|
||||
.param("min_gram", "2")
|
||||
.param("max_gram", "20");
|
||||
|
||||
builder.analyzer("autocompletePhoneticAnalyzer")
|
||||
.withTokenizer("standard")
|
||||
.withTokenFilters("standard", "stop", "snowball_english");
|
||||
|
|
|
@ -1091,8 +1091,8 @@ public abstract class BaseTermReadSvcImpl implements ITermReadSvc {
|
|||
.phrase()
|
||||
.withSlop(2)
|
||||
.onField("myDisplay").boostedTo(4.0f)
|
||||
.andField("myDisplayEdgeNGram").boostedTo(2.0f)
|
||||
.andField("myDisplayNGram").boostedTo(1.0f)
|
||||
//.andField("myDisplayEdgeNGram").boostedTo(2.0f)
|
||||
.andField("myDisplayWordEdgeNGram").boostedTo(1.0f)
|
||||
// .andField("myDisplayPhonetic").boostedTo(0.5f)
|
||||
.sentence(nextFilter.getValue().toLowerCase()).createQuery();
|
||||
bool.must(textQuery);
|
||||
|
|
Loading…
Reference in New Issue