[BAEL-3480] - Java Fast pattern matching using trie and suffix tree
This commit is contained in:
parent
43a19207c8
commit
57470cdc1d
|
@ -14,16 +14,12 @@ public class SuffixTree {
|
|||
private Node root;
|
||||
private String fullText;
|
||||
|
||||
public SuffixTree() {
|
||||
public SuffixTree(String text) {
|
||||
root = new Node("", POSITION_UNDEFINED);
|
||||
fullText = "";
|
||||
}
|
||||
|
||||
public void addText(String text) {
|
||||
for (int i = 0; i < text.length(); i++) {
|
||||
addSuffix(text.substring(i) + WORD_TERMINATION, i);
|
||||
}
|
||||
fullText += text;
|
||||
fullText = text;
|
||||
}
|
||||
|
||||
public List<String> searchText(String pattern) {
|
||||
|
@ -151,8 +147,9 @@ public class SuffixTree {
|
|||
int compareLength = Math.min(nodeText.length(), pattern.length());
|
||||
for (int j = 1; j < compareLength; j++) {
|
||||
if (pattern.charAt(j) != nodeText.charAt(j)) {
|
||||
if (isAllowPartialMatch)
|
||||
if (isAllowPartialMatch) {
|
||||
nodes.add(currentNode);
|
||||
}
|
||||
return nodes;
|
||||
}
|
||||
}
|
||||
|
@ -160,11 +157,11 @@ public class SuffixTree {
|
|||
nodes.add(currentNode);
|
||||
if (pattern.length() > compareLength) {
|
||||
List<Node> nodes2 = getAllNodesInTraversePath(pattern.substring(compareLength), currentNode, isAllowPartialMatch);
|
||||
if (nodes2.size() == 0 && !isAllowPartialMatch) {
|
||||
nodes.add(null);
|
||||
return nodes;
|
||||
}
|
||||
if (nodes2.size() > 0) {
|
||||
nodes.addAll(nodes2);
|
||||
} else if (!isAllowPartialMatch) {
|
||||
nodes.add(null);
|
||||
}
|
||||
}
|
||||
return nodes;
|
||||
}
|
||||
|
|
|
@ -16,8 +16,7 @@ public class SuffixTreeUnitTest {
|
|||
|
||||
@BeforeClass
|
||||
public static void setUp() {
|
||||
suffixTree = new SuffixTree();
|
||||
suffixTree.addText("bananabanana");
|
||||
suffixTree = new SuffixTree("havanabanana");
|
||||
printTree();
|
||||
}
|
||||
|
||||
|
@ -26,7 +25,7 @@ public class SuffixTreeUnitTest {
|
|||
List<String> matches = suffixTree.searchText("a");
|
||||
matches.stream()
|
||||
.forEach(m -> LOGGER.info(m));
|
||||
Assert.assertArrayEquals(new String[] { "b[a]nanabanana", "ban[a]nabanana", "banan[a]banana", "bananab[a]nana", "bananaban[a]na", "bananabanan[a]" }, matches.toArray());
|
||||
Assert.assertArrayEquals(new String[] { "h[a]vanabanana", "hav[a]nabanana", "havan[a]banana", "havanab[a]nana", "havanaban[a]na", "havanabanan[a]" }, matches.toArray());
|
||||
}
|
||||
|
||||
@Test
|
||||
|
@ -34,7 +33,7 @@ public class SuffixTreeUnitTest {
|
|||
List<String> matches = suffixTree.searchText("nab");
|
||||
matches.stream()
|
||||
.forEach(m -> LOGGER.info(m));
|
||||
Assert.assertArrayEquals(new String[] { "bana[nab]anana" }, matches.toArray());
|
||||
Assert.assertArrayEquals(new String[] { "hava[nab]anana" }, matches.toArray());
|
||||
}
|
||||
|
||||
@Test
|
||||
|
@ -47,10 +46,10 @@ public class SuffixTreeUnitTest {
|
|||
|
||||
@Test
|
||||
public void givenSuffixTree_whenSearchingForBanana_thenReturn2Matches() {
|
||||
List<String> matches = suffixTree.searchText("banana");
|
||||
List<String> matches = suffixTree.searchText("ana");
|
||||
matches.stream()
|
||||
.forEach(m -> LOGGER.info(m));
|
||||
Assert.assertArrayEquals(new String[] { "[banana]banana", "banana[banana]" }, matches.toArray());
|
||||
Assert.assertArrayEquals(new String[] { "hav[ana]banana", "havanab[ana]na", "havanaban[ana]" }, matches.toArray());
|
||||
}
|
||||
|
||||
@Test
|
||||
|
@ -58,7 +57,7 @@ public class SuffixTreeUnitTest {
|
|||
List<String> matches = suffixTree.searchText("na");
|
||||
matches.stream()
|
||||
.forEach(m -> LOGGER.info(m));
|
||||
Assert.assertArrayEquals(new String[] { "ba[na]nabanana", "bana[na]banana", "bananaba[na]na", "bananabana[na]" }, matches.toArray());
|
||||
Assert.assertArrayEquals(new String[] { "hava[na]banana", "havanaba[na]na", "havanabana[na]" }, matches.toArray());
|
||||
}
|
||||
|
||||
@Test
|
||||
|
|
Loading…
Reference in New Issue