diff --git a/jetty-util/src/main/java/org/eclipse/jetty/util/SearchPattern.java b/jetty-util/src/main/java/org/eclipse/jetty/util/SearchPattern.java index 58b255f34cb..1455e617c0e 100644 --- a/jetty-util/src/main/java/org/eclipse/jetty/util/SearchPattern.java +++ b/jetty-util/src/main/java/org/eclipse/jetty/util/SearchPattern.java @@ -18,34 +18,55 @@ package org.eclipse.jetty.util; +import java.nio.charset.StandardCharsets; +import java.util.Arrays; + + +/** + * SearchPattern + * + * Fast search for patterns within strings and arrays of bytes. + * Uses an implementation of the Boyer–Moore–Horspool algorithm + * with a 256 character alphabet. + * + * The algorithm has an average-case complexity of O(n) + * on random text and O(nm) in the worst case. + * where: + * m = pattern length + * n = length of data to search + */ public class SearchPattern { static final int alphabetSize = 256; - int[] table; - byte[] pattern; - - public int[] getTable(){ return this.table; } + private int[] table; + private byte[] pattern; - /** - * @param pattern The pattern to search for. - * @return A Pattern instance for the search pattern - */ - static SearchPattern compile(byte[] pattern) + + public static SearchPattern compile(byte[] pattern) { - //Create new SearchPattern instance - SearchPattern sp = new SearchPattern(); + return new SearchPattern(Arrays.copyOf(pattern, pattern.length)); + } + + + public static SearchPattern compile(String pattern) + { + return new SearchPattern(pattern.getBytes(StandardCharsets.UTF_8)); + } + + + private SearchPattern(byte[] pattern) + { + this.pattern = pattern; - //Copy in the Pattern - sp.pattern = pattern.clone(); + if(pattern.length == 0) + throw new IllegalArgumentException("Empty Pattern"); //Build up the pre-processed table for this pattern. - sp.table = new int[alphabetSize]; - for(int i = 0; i data.length) + throw new IllegalArgumentException("(offset+length) out of bounds of data[]"); + } + } diff --git a/jetty-util/src/test/java/org/eclipse/jetty/util/SearchPatternTest.java b/jetty-util/src/test/java/org/eclipse/jetty/util/SearchPatternTest.java index 2add9948e52..90554df8536 100644 --- a/jetty-util/src/test/java/org/eclipse/jetty/util/SearchPatternTest.java +++ b/jetty-util/src/test/java/org/eclipse/jetty/util/SearchPatternTest.java @@ -20,45 +20,46 @@ package org.eclipse.jetty.util; import static org.junit.Assert.*; +import java.nio.charset.StandardCharsets; + import org.junit.Assert; import org.junit.Test; public class SearchPatternTest { + @Test public void testBasicSearch() { - String p1 = "truth"; - String p2 = "evident"; - String p3 = "we"; - String d = "we hold these truths to be self evident"; - + byte[] p1 = new String("truth").getBytes(StandardCharsets.US_ASCII); + byte[] p2 = new String("evident").getBytes(StandardCharsets.US_ASCII); + byte[] p3 = new String("we").getBytes(StandardCharsets.US_ASCII); + byte[] d = new String("we hold these truths to be self evident").getBytes(StandardCharsets.US_ASCII); // Testing Compiled Pattern p1 "truth" - SearchPattern sp1 = SearchPattern.compile(p1.getBytes()); - Assert.assertEquals(14,sp1.match(d.getBytes(), 0, d.length())); - Assert.assertEquals(14,sp1.match(d.getBytes(),14,p1.length())); - Assert.assertEquals(14,sp1.match(d.getBytes(),14,p1.length()+1)); - Assert.assertEquals(-1,sp1.match(d.getBytes(),14,p1.length()-1)); - Assert.assertEquals(-1,sp1.match(d.getBytes(),15,d.length())); + SearchPattern sp1 = SearchPattern.compile(p1); + Assert.assertEquals(14,sp1.match(d, 0, d.length)); + Assert.assertEquals(14,sp1.match(d,14,p1.length)); + Assert.assertEquals(14,sp1.match(d,14,p1.length+1)); + Assert.assertEquals(-1,sp1.match(d,14,p1.length-1)); + Assert.assertEquals(-1,sp1.match(d,15,d.length-15)); // Testing Compiled Pattern p2 "evident" - SearchPattern sp2 = SearchPattern.compile(p2.getBytes()); - Assert.assertEquals(32,sp2.match(d.getBytes(), 0, d.length())); - Assert.assertEquals(32,sp2.match(d.getBytes(),32,p2.length())); - Assert.assertEquals(32,sp2.match(d.getBytes(),32,p2.length()+1)); - Assert.assertEquals(-1,sp2.match(d.getBytes(),32,p2.length()-1)); - Assert.assertEquals(-1,sp2.match(d.getBytes(),33,d.length())); - + SearchPattern sp2 = SearchPattern.compile(p2); + Assert.assertEquals(32,sp2.match(d, 0, d.length)); + Assert.assertEquals(32,sp2.match(d,32,p2.length)); + Assert.assertEquals(32,sp2.match(d,32,p2.length)); + Assert.assertEquals(-1,sp2.match(d,32,p2.length-1)); + Assert.assertEquals(-1,sp2.match(d,33,d.length-33)); // Testing Compiled Pattern p3 "evident" - SearchPattern sp3 = SearchPattern.compile(p3.getBytes()); - Assert.assertEquals( 0,sp3.match(d.getBytes(), 0, d.length())); - Assert.assertEquals( 0,sp3.match(d.getBytes(), 0, p3.length())); - Assert.assertEquals( 0,sp3.match(d.getBytes(), 0, p3.length()+1)); - Assert.assertEquals(-1,sp3.match(d.getBytes(), 0, p3.length()-1)); - Assert.assertEquals(-1,sp3.match(d.getBytes(), 1, d.length())); + SearchPattern sp3 = SearchPattern.compile(p3); + Assert.assertEquals( 0,sp3.match(d, 0, d.length)); + Assert.assertEquals( 0,sp3.match(d, 0, p3.length)); + Assert.assertEquals( 0,sp3.match(d, 0, p3.length+1)); + Assert.assertEquals(-1,sp3.match(d, 0, p3.length-1)); + Assert.assertEquals(-1,sp3.match(d, 1, d.length-1)); } @@ -66,17 +67,106 @@ public class SearchPatternTest @Test public void testDoubleMatch() { - String p = "violent"; - String d = "These violent delights have violent ends."; - - // Testing Compiled Pattern p1 "truth" - SearchPattern sp = SearchPattern.compile(p.getBytes()); - Assert.assertEquals( 6,sp.match(d.getBytes(), 0, d.length())); - Assert.assertEquals(-1,sp.match(d.getBytes(), 6, p.length()-1)); - Assert.assertEquals(28,sp.match(d.getBytes(), 7, d.length())); - Assert.assertEquals(28,sp.match(d.getBytes(), 28, d.length())); - Assert.assertEquals(-1,sp.match(d.getBytes(), 29, d.length())); - + byte[] p = new String("violent").getBytes(StandardCharsets.US_ASCII); + byte[] d = new String("These violent delights have violent ends.").getBytes(StandardCharsets.US_ASCII); + SearchPattern sp = SearchPattern.compile(p); + Assert.assertEquals( 6,sp.match(d, 0, d.length)); + Assert.assertEquals(-1,sp.match(d, 6, p.length-1)); + Assert.assertEquals(28,sp.match(d, 7, d.length-7)); + Assert.assertEquals(28,sp.match(d, 28, d.length-28)); + Assert.assertEquals(-1,sp.match(d, 29, d.length-29)); } + + @Test + public void testAlmostMatch() + { + byte[] p = new String("violent").getBytes(StandardCharsets.US_ASCII); + byte[] d = new String("vio lent violen v iolent violin vioviolenlent viiolent").getBytes(StandardCharsets.US_ASCII); + SearchPattern sp = SearchPattern.compile(p); + Assert.assertEquals(-1,sp.match(d, 0, d.length)); + } + + + @Test + public void testOddSizedPatterns() + { + // Test Large Pattern + byte[] p = new String("pneumonoultramicroscopicsilicovolcanoconiosis").getBytes(StandardCharsets.US_ASCII); + byte[] d = new String("pneumon").getBytes(StandardCharsets.US_ASCII); + SearchPattern sp = SearchPattern.compile(p); + Assert.assertEquals(-1,sp.match(d, 0, d.length)); + + // Test Single Character Pattern + p = new String("s").getBytes(StandardCharsets.US_ASCII); + d = new String("the cake is a lie").getBytes(StandardCharsets.US_ASCII); + sp = SearchPattern.compile(p); + Assert.assertEquals(10,sp.match(d, 0, d.length)); + } + + + @Test + public void testEndsWith() + { + byte[] p = new String("pneumonoultramicroscopicsilicovolcanoconiosis").getBytes(StandardCharsets.US_ASCII); + byte[] d = new String("pneumonoultrami").getBytes(StandardCharsets.US_ASCII); + SearchPattern sp = SearchPattern.compile(p); + Assert.assertEquals(15,sp.endsWith(d,0,d.length)); + + p = new String("abcdefghijklmnopqrstuvwxyz").getBytes(StandardCharsets.US_ASCII); + d = new String("abcdefghijklmnopqrstuvwxyzabcdefghijklmno").getBytes(StandardCharsets.US_ASCII); + sp = SearchPattern.compile(p); + Assert.assertEquals(0,sp.match(d,0,d.length)); + Assert.assertEquals(-1,sp.match(d,1,d.length-1)); + Assert.assertEquals(15,sp.endsWith(d,0,d.length)); + + p = new String("abcdefghijklmnopqrstuvwxyz").getBytes(StandardCharsets.US_ASCII); + d = new String("abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz").getBytes(StandardCharsets.US_ASCII); + sp = SearchPattern.compile(p); + Assert.assertEquals(0,sp.match(d,0,d.length)); + Assert.assertEquals(26,sp.match(d,1,d.length-1)); + Assert.assertEquals(26,sp.endsWith(d,0,d.length)); + + //test no match + p = new String("hello world").getBytes(StandardCharsets.US_ASCII); + d = new String("there is definitely no match in here").getBytes(StandardCharsets.US_ASCII); + sp = SearchPattern.compile(p); + Assert.assertEquals(0,sp.endsWith(d,0,d.length)); + } + + + @Test + public void testStartsWith() + { + byte[] p = new String("abcdefghijklmnopqrstuvwxyz").getBytes(StandardCharsets.US_ASCII); + byte[] d = new String("ijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz").getBytes(StandardCharsets.US_ASCII); + SearchPattern sp = SearchPattern.compile(p); + Assert.assertEquals(18,sp.match(d,0,d.length)); + Assert.assertEquals(-1,sp.match(d,19,d.length-19)); + Assert.assertEquals(18,sp.startsWith(d,0,d.length,8)); + + p = new String("abcdefghijklmnopqrstuvwxyz").getBytes(StandardCharsets.US_ASCII); + d = new String("ijklmnopqrstuvwxyz abcdefghijklmnopqrstuvwxyz").getBytes(StandardCharsets.US_ASCII); + sp = SearchPattern.compile(p); + Assert.assertEquals(19,sp.match(d,0,d.length)); + Assert.assertEquals(-1,sp.match(d,20,d.length-20)); + Assert.assertEquals(18,sp.startsWith(d,0,d.length,8)); + + p = new String("abcdefghijklmnopqrstuvwxyz").getBytes(StandardCharsets.US_ASCII); + d = new String("abcdefghijklmnopqrstuvwxyz abcdefghijklmnopqrstuvwxyz").getBytes(StandardCharsets.US_ASCII); + sp = SearchPattern.compile(p); + Assert.assertEquals(26,sp.startsWith(d,0,d.length,0)); + + //test no match + p = new String("hello world").getBytes(StandardCharsets.US_ASCII); + d = new String("there is definitely no match in here").getBytes(StandardCharsets.US_ASCII); + sp = SearchPattern.compile(p); + Assert.assertEquals(0,sp.startsWith(d,0,d.length,0)); + + //test large pattern small buffer + p = new String("abcdefghijklmnopqrstuvwxyz").getBytes(StandardCharsets.US_ASCII); + d = new String("mnopqrs").getBytes(StandardCharsets.US_ASCII); + sp = SearchPattern.compile(p); + Assert.assertEquals(7,sp.startsWith(d,0,d.length,12)); + } }