Rework on the expand issue replaced the EdgeAnalyzer

This commit is contained in:
Frank Tao 2020-11-26 20:37:51 -05:00
parent a511543e77
commit 014e2ba9cb
6 changed files with 17 additions and 36 deletions

View File

@ -79,28 +79,4 @@ public class StringUtil {
return new String(bytes, StandardCharsets.UTF_8);
}
/**
* Check the input string has prefix token
*
* e.g. For a display text of Body height:
* searching on "Bo" or "he" should find it
* searching on "ei" or "dy" should not find it
*
* @param theInput the input string
* @param thePrefix the prefix string of a token
* @return Return true if a input string token separated by space start with the prefix
*/
public static boolean isStartsWithIgnoreCase(final String theInput, final String thePrefix) {
if (theInput == null || thePrefix == null)
return false;
StringTokenizer tok = new StringTokenizer(theInput);
while (tok.hasMoreTokens()) {
if (org.apache.commons.lang3.StringUtils.startsWithIgnoreCase(tok.nextToken(), thePrefix))
return true;
}
return false;
}
}

View File

@ -53,14 +53,4 @@ public class StringUtilTest {
assertEquals("a/a", StringUtil.chompCharacter("a/a////", '/'));
}
@Test
public void testIsStartsWithIgnoreCase() {
assertFalse(StringUtil.isStartsWithIgnoreCase(null, null));
assertFalse(StringUtil.isStartsWithIgnoreCase(null, "hei"));
assertFalse(StringUtil.isStartsWithIgnoreCase("Body height", null));
assertTrue(StringUtil.isStartsWithIgnoreCase("Body height", "he"));
assertTrue(StringUtil.isStartsWithIgnoreCase("Body height", "bo"));
assertFalse(StringUtil.isStartsWithIgnoreCase("Body height", "ei"));
assertFalse(StringUtil.isStartsWithIgnoreCase("Body height", "dy"));
}
}

View File

@ -74,6 +74,7 @@ public class TermConcept implements Serializable {
@Fields({
@Field(name = "myDisplay", index = org.hibernate.search.annotations.Index.YES, store = Store.YES, analyze = Analyze.YES, analyzer = @Analyzer(definition = "standardAnalyzer")),
@Field(name = "myDisplayEdgeNGram", index = org.hibernate.search.annotations.Index.YES, store = Store.NO, analyze = Analyze.YES, analyzer = @Analyzer(definition = "autocompleteEdgeAnalyzer")),
@Field(name = "myDisplayWordEdgeNGram", index = org.hibernate.search.annotations.Index.YES, store = Store.NO, analyze = Analyze.YES, analyzer = @Analyzer(definition = "autocompleteWordEdgeAnalyzer")),
@Field(name = "myDisplayNGram", index = org.hibernate.search.annotations.Index.YES, store = Store.NO, analyze = Analyze.YES, analyzer = @Analyzer(definition = "autocompleteNGramAnalyzer")),
@Field(name = "myDisplayPhonetic", index = org.hibernate.search.annotations.Index.YES, store = Store.NO, analyze = Analyze.YES, analyzer = @Analyzer(definition = "autocompletePhoneticAnalyzer"))
})

View File

@ -48,6 +48,12 @@ public class LuceneSearchMappingFactory {
.filter(EdgeNGramFilterFactory.class)
.param("minGramSize", "3")
.param("maxGramSize", "50")
.analyzerDef("autocompleteWordEdgeAnalyzer", StandardTokenizerFactory.class)
.filter(LowerCaseFilterFactory.class)
.filter(StopFilterFactory.class)
.filter(EdgeNGramFilterFactory.class)
.param("minGramSize", "2")
.param("maxGramSize", "20")
.analyzerDef("autocompletePhoneticAnalyzer", StandardTokenizerFactory.class)
.filter(StandardFilterFactory.class)
.filter(StopFilterFactory.class)

View File

@ -36,6 +36,14 @@ public class ElasticsearchMappingProvider implements ElasticsearchAnalysisDefini
.param("min_gram", "3")
.param("max_gram", "50");
builder.analyzer("autocompleteWordEdgeAnalyzer")
.withTokenizer("standard")
.withTokenFilters("lowercase", "stop", "edgengram_3_50");
builder.tokenFilter("edgengram_3_50")
.type("edgeNGram")
.param("min_gram", "2")
.param("max_gram", "20");
builder.analyzer("autocompletePhoneticAnalyzer")
.withTokenizer("standard")
.withTokenFilters("standard", "stop", "snowball_english");

View File

@ -1091,8 +1091,8 @@ public abstract class BaseTermReadSvcImpl implements ITermReadSvc {
.phrase()
.withSlop(2)
.onField("myDisplay").boostedTo(4.0f)
.andField("myDisplayEdgeNGram").boostedTo(2.0f)
.andField("myDisplayNGram").boostedTo(1.0f)
//.andField("myDisplayEdgeNGram").boostedTo(2.0f)
.andField("myDisplayWordEdgeNGram").boostedTo(1.0f)
// .andField("myDisplayPhonetic").boostedTo(0.5f)
.sentence(nextFilter.getValue().toLowerCase()).createQuery();
bool.must(textQuery);