Completed SearchPattern class and tests
Signed-off-by: Lachlan Roberts <lachlan@webtide.com>
This commit is contained in:
parent
f68cf85e94
commit
809aee129e
|
@ -18,34 +18,55 @@
|
|||
|
||||
package org.eclipse.jetty.util;
|
||||
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.util.Arrays;
|
||||
|
||||
|
||||
/**
|
||||
* SearchPattern
|
||||
*
|
||||
* Fast search for patterns within strings and arrays of bytes.
|
||||
* Uses an implementation of the Boyer–Moore–Horspool algorithm
|
||||
* with a 256 character alphabet.
|
||||
*
|
||||
* The algorithm has an average-case complexity of O(n)
|
||||
* on random text and O(nm) in the worst case.
|
||||
* where:
|
||||
* m = pattern length
|
||||
* n = length of data to search
|
||||
*/
|
||||
public class SearchPattern
|
||||
{
|
||||
static final int alphabetSize = 256;
|
||||
int[] table;
|
||||
byte[] pattern;
|
||||
private int[] table;
|
||||
private byte[] pattern;
|
||||
|
||||
public int[] getTable(){ return this.table; }
|
||||
|
||||
/**
|
||||
* @param pattern The pattern to search for.
|
||||
* @return A Pattern instance for the search pattern
|
||||
*/
|
||||
static SearchPattern compile(byte[] pattern)
|
||||
public static SearchPattern compile(byte[] pattern)
|
||||
{
|
||||
//Create new SearchPattern instance
|
||||
SearchPattern sp = new SearchPattern();
|
||||
return new SearchPattern(Arrays.copyOf(pattern, pattern.length));
|
||||
}
|
||||
|
||||
//Copy in the Pattern
|
||||
sp.pattern = pattern.clone();
|
||||
|
||||
public static SearchPattern compile(String pattern)
|
||||
{
|
||||
return new SearchPattern(pattern.getBytes(StandardCharsets.UTF_8));
|
||||
}
|
||||
|
||||
|
||||
private SearchPattern(byte[] pattern)
|
||||
{
|
||||
this.pattern = pattern;
|
||||
|
||||
if(pattern.length == 0)
|
||||
throw new IllegalArgumentException("Empty Pattern");
|
||||
|
||||
//Build up the pre-processed table for this pattern.
|
||||
sp.table = new int[alphabetSize];
|
||||
for(int i = 0; i<sp.table.length; ++i)
|
||||
sp.table[i] = sp.pattern.length;
|
||||
for(int i = 0; i<sp.pattern.length-1; ++i)
|
||||
sp.table[sp.pattern[i]] = sp.pattern.length-1-i;
|
||||
|
||||
return sp;
|
||||
table = new int[alphabetSize];
|
||||
for(int i = 0; i<table.length; ++i)
|
||||
table[i] = pattern.length;
|
||||
for(int i = 0; i<pattern.length-1; ++i)
|
||||
table[pattern[i]] = pattern.length-1-i;
|
||||
}
|
||||
|
||||
|
||||
|
@ -59,8 +80,10 @@ public class SearchPattern
|
|||
*/
|
||||
public int match(byte[] data, int offset, int length)
|
||||
{
|
||||
validate(data, offset, length);
|
||||
|
||||
int skip = offset;
|
||||
while((skip <= data.length - pattern.length) && (skip+pattern.length <= offset+length))
|
||||
while(skip <= offset+length - pattern.length)
|
||||
{
|
||||
for(int i = pattern.length-1; data[skip+i] == pattern[i]; i--)
|
||||
if(i==0) return skip;
|
||||
|
@ -77,25 +100,68 @@ public class SearchPattern
|
|||
* but the pattern will always be {@link StandardCharsets#US_ASCII} encoded.
|
||||
* @param offset The offset within the data to start the search
|
||||
* @param length The length of the data to search
|
||||
* @return the length of the partial pattern matched or -1 for no match.
|
||||
* @return the length of the partial pattern matched and 0 for no match.
|
||||
*/
|
||||
public int endsPartiallyWith(byte data, int offset, int length)
|
||||
public int endsWith(byte[] data, int offset, int length)
|
||||
{
|
||||
return -1;
|
||||
validate(data, offset, length);
|
||||
|
||||
int skip = (pattern.length <= length) ? (offset+length-pattern.length) : offset;
|
||||
while(skip < offset+length)
|
||||
{
|
||||
for(int i = (offset+length-1)-skip; data[skip+i] == pattern[i]; --i)
|
||||
if(i==0) return(offset+length - skip);
|
||||
|
||||
if(skip + pattern.length - 1 < data.length)
|
||||
skip += table[data[skip + pattern.length - 1]];
|
||||
else
|
||||
skip++;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* Search for a partial match of the pattern at the start of the data.
|
||||
* Search for a possibly partial match of the pattern at the start of the data.
|
||||
* @param data The data in which to search for. The data may be arbitrary binary data,
|
||||
* but the pattern will always be {@link StandardCharsets#US_ASCII} encoded.
|
||||
* @param offset The offset within the data to start the search
|
||||
* @param length The length of the data to search
|
||||
* @param matched The length of the partial pattern already matched
|
||||
* @return the length of the partial pattern matched or -1 for no match.
|
||||
* @return the length of the partial pattern matched and 0 for no match.
|
||||
*/
|
||||
public int startsPartialyWith(byte[] data, int offset, int length, int matched)
|
||||
public int startsWith(byte[] data, int offset, int length, int matched)
|
||||
{
|
||||
return -1;
|
||||
validate(data, offset, length);
|
||||
|
||||
int matchedCount = 0;
|
||||
|
||||
for(int i=0; i<pattern.length-matched && i < offset+length; i++)
|
||||
{
|
||||
if(data[i] == pattern[i+matched])
|
||||
matchedCount++;
|
||||
else
|
||||
break;
|
||||
}
|
||||
|
||||
return matchedCount;
|
||||
}
|
||||
|
||||
/**
|
||||
* Performs legality checks for standard arguments input into SearchPattern methods.
|
||||
* @param data The data in which to search for. The data may be arbitrary binary data,
|
||||
* but the pattern will always be {@link StandardCharsets#US_ASCII} encoded.
|
||||
* @param offset The offset within the data to start the search
|
||||
* @param length The length of the data to search
|
||||
*/
|
||||
private void validate(byte[] data, int offset, int length)
|
||||
{
|
||||
if (offset < 0)
|
||||
throw new IllegalArgumentException("offset was negative");
|
||||
else if (length < 0)
|
||||
throw new IllegalArgumentException("length was negative");
|
||||
else if (offset + length > data.length)
|
||||
throw new IllegalArgumentException("(offset+length) out of bounds of data[]");
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -20,45 +20,46 @@ package org.eclipse.jetty.util;
|
|||
|
||||
import static org.junit.Assert.*;
|
||||
|
||||
import java.nio.charset.StandardCharsets;
|
||||
|
||||
import org.junit.Assert;
|
||||
import org.junit.Test;
|
||||
|
||||
public class SearchPatternTest
|
||||
{
|
||||
|
||||
|
||||
@Test
|
||||
public void testBasicSearch()
|
||||
{
|
||||
String p1 = "truth";
|
||||
String p2 = "evident";
|
||||
String p3 = "we";
|
||||
String d = "we hold these truths to be self evident";
|
||||
|
||||
byte[] p1 = new String("truth").getBytes(StandardCharsets.US_ASCII);
|
||||
byte[] p2 = new String("evident").getBytes(StandardCharsets.US_ASCII);
|
||||
byte[] p3 = new String("we").getBytes(StandardCharsets.US_ASCII);
|
||||
byte[] d = new String("we hold these truths to be self evident").getBytes(StandardCharsets.US_ASCII);
|
||||
|
||||
// Testing Compiled Pattern p1 "truth"
|
||||
SearchPattern sp1 = SearchPattern.compile(p1.getBytes());
|
||||
Assert.assertEquals(14,sp1.match(d.getBytes(), 0, d.length()));
|
||||
Assert.assertEquals(14,sp1.match(d.getBytes(),14,p1.length()));
|
||||
Assert.assertEquals(14,sp1.match(d.getBytes(),14,p1.length()+1));
|
||||
Assert.assertEquals(-1,sp1.match(d.getBytes(),14,p1.length()-1));
|
||||
Assert.assertEquals(-1,sp1.match(d.getBytes(),15,d.length()));
|
||||
SearchPattern sp1 = SearchPattern.compile(p1);
|
||||
Assert.assertEquals(14,sp1.match(d, 0, d.length));
|
||||
Assert.assertEquals(14,sp1.match(d,14,p1.length));
|
||||
Assert.assertEquals(14,sp1.match(d,14,p1.length+1));
|
||||
Assert.assertEquals(-1,sp1.match(d,14,p1.length-1));
|
||||
Assert.assertEquals(-1,sp1.match(d,15,d.length-15));
|
||||
|
||||
// Testing Compiled Pattern p2 "evident"
|
||||
SearchPattern sp2 = SearchPattern.compile(p2.getBytes());
|
||||
Assert.assertEquals(32,sp2.match(d.getBytes(), 0, d.length()));
|
||||
Assert.assertEquals(32,sp2.match(d.getBytes(),32,p2.length()));
|
||||
Assert.assertEquals(32,sp2.match(d.getBytes(),32,p2.length()+1));
|
||||
Assert.assertEquals(-1,sp2.match(d.getBytes(),32,p2.length()-1));
|
||||
Assert.assertEquals(-1,sp2.match(d.getBytes(),33,d.length()));
|
||||
|
||||
SearchPattern sp2 = SearchPattern.compile(p2);
|
||||
Assert.assertEquals(32,sp2.match(d, 0, d.length));
|
||||
Assert.assertEquals(32,sp2.match(d,32,p2.length));
|
||||
Assert.assertEquals(32,sp2.match(d,32,p2.length));
|
||||
Assert.assertEquals(-1,sp2.match(d,32,p2.length-1));
|
||||
Assert.assertEquals(-1,sp2.match(d,33,d.length-33));
|
||||
|
||||
// Testing Compiled Pattern p3 "evident"
|
||||
SearchPattern sp3 = SearchPattern.compile(p3.getBytes());
|
||||
Assert.assertEquals( 0,sp3.match(d.getBytes(), 0, d.length()));
|
||||
Assert.assertEquals( 0,sp3.match(d.getBytes(), 0, p3.length()));
|
||||
Assert.assertEquals( 0,sp3.match(d.getBytes(), 0, p3.length()+1));
|
||||
Assert.assertEquals(-1,sp3.match(d.getBytes(), 0, p3.length()-1));
|
||||
Assert.assertEquals(-1,sp3.match(d.getBytes(), 1, d.length()));
|
||||
SearchPattern sp3 = SearchPattern.compile(p3);
|
||||
Assert.assertEquals( 0,sp3.match(d, 0, d.length));
|
||||
Assert.assertEquals( 0,sp3.match(d, 0, p3.length));
|
||||
Assert.assertEquals( 0,sp3.match(d, 0, p3.length+1));
|
||||
Assert.assertEquals(-1,sp3.match(d, 0, p3.length-1));
|
||||
Assert.assertEquals(-1,sp3.match(d, 1, d.length-1));
|
||||
|
||||
}
|
||||
|
||||
|
@ -66,17 +67,106 @@ public class SearchPatternTest
|
|||
@Test
|
||||
public void testDoubleMatch()
|
||||
{
|
||||
String p = "violent";
|
||||
String d = "These violent delights have violent ends.";
|
||||
|
||||
// Testing Compiled Pattern p1 "truth"
|
||||
SearchPattern sp = SearchPattern.compile(p.getBytes());
|
||||
Assert.assertEquals( 6,sp.match(d.getBytes(), 0, d.length()));
|
||||
Assert.assertEquals(-1,sp.match(d.getBytes(), 6, p.length()-1));
|
||||
Assert.assertEquals(28,sp.match(d.getBytes(), 7, d.length()));
|
||||
Assert.assertEquals(28,sp.match(d.getBytes(), 28, d.length()));
|
||||
Assert.assertEquals(-1,sp.match(d.getBytes(), 29, d.length()));
|
||||
|
||||
byte[] p = new String("violent").getBytes(StandardCharsets.US_ASCII);
|
||||
byte[] d = new String("These violent delights have violent ends.").getBytes(StandardCharsets.US_ASCII);
|
||||
SearchPattern sp = SearchPattern.compile(p);
|
||||
Assert.assertEquals( 6,sp.match(d, 0, d.length));
|
||||
Assert.assertEquals(-1,sp.match(d, 6, p.length-1));
|
||||
Assert.assertEquals(28,sp.match(d, 7, d.length-7));
|
||||
Assert.assertEquals(28,sp.match(d, 28, d.length-28));
|
||||
Assert.assertEquals(-1,sp.match(d, 29, d.length-29));
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testAlmostMatch()
|
||||
{
|
||||
byte[] p = new String("violent").getBytes(StandardCharsets.US_ASCII);
|
||||
byte[] d = new String("vio lent violen v iolent violin vioviolenlent viiolent").getBytes(StandardCharsets.US_ASCII);
|
||||
SearchPattern sp = SearchPattern.compile(p);
|
||||
Assert.assertEquals(-1,sp.match(d, 0, d.length));
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testOddSizedPatterns()
|
||||
{
|
||||
// Test Large Pattern
|
||||
byte[] p = new String("pneumonoultramicroscopicsilicovolcanoconiosis").getBytes(StandardCharsets.US_ASCII);
|
||||
byte[] d = new String("pneumon").getBytes(StandardCharsets.US_ASCII);
|
||||
SearchPattern sp = SearchPattern.compile(p);
|
||||
Assert.assertEquals(-1,sp.match(d, 0, d.length));
|
||||
|
||||
// Test Single Character Pattern
|
||||
p = new String("s").getBytes(StandardCharsets.US_ASCII);
|
||||
d = new String("the cake is a lie").getBytes(StandardCharsets.US_ASCII);
|
||||
sp = SearchPattern.compile(p);
|
||||
Assert.assertEquals(10,sp.match(d, 0, d.length));
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testEndsWith()
|
||||
{
|
||||
byte[] p = new String("pneumonoultramicroscopicsilicovolcanoconiosis").getBytes(StandardCharsets.US_ASCII);
|
||||
byte[] d = new String("pneumonoultrami").getBytes(StandardCharsets.US_ASCII);
|
||||
SearchPattern sp = SearchPattern.compile(p);
|
||||
Assert.assertEquals(15,sp.endsWith(d,0,d.length));
|
||||
|
||||
p = new String("abcdefghijklmnopqrstuvwxyz").getBytes(StandardCharsets.US_ASCII);
|
||||
d = new String("abcdefghijklmnopqrstuvwxyzabcdefghijklmno").getBytes(StandardCharsets.US_ASCII);
|
||||
sp = SearchPattern.compile(p);
|
||||
Assert.assertEquals(0,sp.match(d,0,d.length));
|
||||
Assert.assertEquals(-1,sp.match(d,1,d.length-1));
|
||||
Assert.assertEquals(15,sp.endsWith(d,0,d.length));
|
||||
|
||||
p = new String("abcdefghijklmnopqrstuvwxyz").getBytes(StandardCharsets.US_ASCII);
|
||||
d = new String("abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz").getBytes(StandardCharsets.US_ASCII);
|
||||
sp = SearchPattern.compile(p);
|
||||
Assert.assertEquals(0,sp.match(d,0,d.length));
|
||||
Assert.assertEquals(26,sp.match(d,1,d.length-1));
|
||||
Assert.assertEquals(26,sp.endsWith(d,0,d.length));
|
||||
|
||||
//test no match
|
||||
p = new String("hello world").getBytes(StandardCharsets.US_ASCII);
|
||||
d = new String("there is definitely no match in here").getBytes(StandardCharsets.US_ASCII);
|
||||
sp = SearchPattern.compile(p);
|
||||
Assert.assertEquals(0,sp.endsWith(d,0,d.length));
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testStartsWith()
|
||||
{
|
||||
byte[] p = new String("abcdefghijklmnopqrstuvwxyz").getBytes(StandardCharsets.US_ASCII);
|
||||
byte[] d = new String("ijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz").getBytes(StandardCharsets.US_ASCII);
|
||||
SearchPattern sp = SearchPattern.compile(p);
|
||||
Assert.assertEquals(18,sp.match(d,0,d.length));
|
||||
Assert.assertEquals(-1,sp.match(d,19,d.length-19));
|
||||
Assert.assertEquals(18,sp.startsWith(d,0,d.length,8));
|
||||
|
||||
p = new String("abcdefghijklmnopqrstuvwxyz").getBytes(StandardCharsets.US_ASCII);
|
||||
d = new String("ijklmnopqrstuvwxyz abcdefghijklmnopqrstuvwxyz").getBytes(StandardCharsets.US_ASCII);
|
||||
sp = SearchPattern.compile(p);
|
||||
Assert.assertEquals(19,sp.match(d,0,d.length));
|
||||
Assert.assertEquals(-1,sp.match(d,20,d.length-20));
|
||||
Assert.assertEquals(18,sp.startsWith(d,0,d.length,8));
|
||||
|
||||
p = new String("abcdefghijklmnopqrstuvwxyz").getBytes(StandardCharsets.US_ASCII);
|
||||
d = new String("abcdefghijklmnopqrstuvwxyz abcdefghijklmnopqrstuvwxyz").getBytes(StandardCharsets.US_ASCII);
|
||||
sp = SearchPattern.compile(p);
|
||||
Assert.assertEquals(26,sp.startsWith(d,0,d.length,0));
|
||||
|
||||
//test no match
|
||||
p = new String("hello world").getBytes(StandardCharsets.US_ASCII);
|
||||
d = new String("there is definitely no match in here").getBytes(StandardCharsets.US_ASCII);
|
||||
sp = SearchPattern.compile(p);
|
||||
Assert.assertEquals(0,sp.startsWith(d,0,d.length,0));
|
||||
|
||||
//test large pattern small buffer
|
||||
p = new String("abcdefghijklmnopqrstuvwxyz").getBytes(StandardCharsets.US_ASCII);
|
||||
d = new String("mnopqrs").getBytes(StandardCharsets.US_ASCII);
|
||||
sp = SearchPattern.compile(p);
|
||||
Assert.assertEquals(7,sp.startsWith(d,0,d.length,12));
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue