Merge remote-tracking branch 'lachlan/jetty-9.4.x-1027-SearchPattern' into jetty-9.4.x-1027-Multipart
This commit is contained in:
commit
25b8933d34
|
@ -0,0 +1,179 @@
|
||||||
|
//
|
||||||
|
// ========================================================================
|
||||||
|
// Copyright (c) 1995-2018 Mort Bay Consulting Pty. Ltd.
|
||||||
|
// ------------------------------------------------------------------------
|
||||||
|
// All rights reserved. This program and the accompanying materials
|
||||||
|
// are made available under the terms of the Eclipse Public License v1.0
|
||||||
|
// and Apache License v2.0 which accompanies this distribution.
|
||||||
|
//
|
||||||
|
// The Eclipse Public License is available at
|
||||||
|
// http://www.eclipse.org/legal/epl-v10.html
|
||||||
|
//
|
||||||
|
// The Apache License v2.0 is available at
|
||||||
|
// http://www.opensource.org/licenses/apache2.0.php
|
||||||
|
//
|
||||||
|
// You may elect to redistribute this code under either of these licenses.
|
||||||
|
// ========================================================================
|
||||||
|
//
|
||||||
|
|
||||||
|
package org.eclipse.jetty.util;
|
||||||
|
|
||||||
|
import java.nio.charset.StandardCharsets;
|
||||||
|
import java.util.Arrays;
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* SearchPattern
|
||||||
|
*
|
||||||
|
* Fast search for patterns within strings and arrays of bytes.
|
||||||
|
* Uses an implementation of the Boyer–Moore–Horspool algorithm
|
||||||
|
* with a 256 character alphabet.
|
||||||
|
*
|
||||||
|
* The algorithm has an average-case complexity of O(n)
|
||||||
|
* on random text and O(nm) in the worst case.
|
||||||
|
* where:
|
||||||
|
* m = pattern length
|
||||||
|
* n = length of data to search
|
||||||
|
*/
|
||||||
|
public class SearchPattern
|
||||||
|
{
|
||||||
|
static final int alphabetSize = 256;
|
||||||
|
private int[] table;
|
||||||
|
private byte[] pattern;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Produces a SearchPattern instance which can be used
|
||||||
|
* to find matches of the pattern in data
|
||||||
|
* @param pattern byte array containing the pattern
|
||||||
|
* @return a new SearchPattern instance using the given pattern
|
||||||
|
*/
|
||||||
|
public static SearchPattern compile(byte[] pattern)
|
||||||
|
{
|
||||||
|
return new SearchPattern(Arrays.copyOf(pattern, pattern.length));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Produces a SearchPattern instance which can be used
|
||||||
|
* to find matches of the pattern in data
|
||||||
|
* @param pattern string containing the pattern
|
||||||
|
* @return a new SearchPattern instance using the given pattern
|
||||||
|
*/
|
||||||
|
public static SearchPattern compile(String pattern)
|
||||||
|
{
|
||||||
|
return new SearchPattern(pattern.getBytes(StandardCharsets.UTF_8));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param pattern byte array containing the pattern used for matching
|
||||||
|
*/
|
||||||
|
private SearchPattern(byte[] pattern)
|
||||||
|
{
|
||||||
|
this.pattern = pattern;
|
||||||
|
|
||||||
|
if(pattern.length == 0)
|
||||||
|
throw new IllegalArgumentException("Empty Pattern");
|
||||||
|
|
||||||
|
//Build up the pre-processed table for this pattern.
|
||||||
|
table = new int[alphabetSize];
|
||||||
|
for(int i = 0; i<table.length; ++i)
|
||||||
|
table[i] = pattern.length;
|
||||||
|
for(int i = 0; i<pattern.length-1; ++i)
|
||||||
|
table[pattern[i]] = pattern.length-1-i;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Search for a complete match of the pattern within the data
|
||||||
|
* @param data The data in which to search for. The data may be arbitrary binary data,
|
||||||
|
* but the pattern will always be {@link StandardCharsets#US_ASCII} encoded.
|
||||||
|
* @param offset The offset within the data to start the search
|
||||||
|
* @param length The length of the data to search
|
||||||
|
* @return The index within the data array at which the first instance of the pattern or -1 if not found
|
||||||
|
*/
|
||||||
|
public int match(byte[] data, int offset, int length)
|
||||||
|
{
|
||||||
|
validate(data, offset, length);
|
||||||
|
|
||||||
|
int skip = offset;
|
||||||
|
while(skip <= offset+length - pattern.length)
|
||||||
|
{
|
||||||
|
for(int i = pattern.length-1; data[skip+i] == pattern[i]; i--)
|
||||||
|
if(i==0) return skip;
|
||||||
|
|
||||||
|
skip += table[data[skip + pattern.length - 1]];
|
||||||
|
}
|
||||||
|
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Search for a partial match of the pattern at the end of the data.
|
||||||
|
* @param data The data in which to search for. The data may be arbitrary binary data,
|
||||||
|
* but the pattern will always be {@link StandardCharsets#US_ASCII} encoded.
|
||||||
|
* @param offset The offset within the data to start the search
|
||||||
|
* @param length The length of the data to search
|
||||||
|
* @return the length of the partial pattern matched and 0 for no match.
|
||||||
|
*/
|
||||||
|
public int endsWith(byte[] data, int offset, int length)
|
||||||
|
{
|
||||||
|
validate(data, offset, length);
|
||||||
|
|
||||||
|
int skip = (pattern.length <= length) ? (offset+length-pattern.length) : offset;
|
||||||
|
while(skip < offset+length)
|
||||||
|
{
|
||||||
|
for(int i = (offset+length-1)-skip; data[skip+i] == pattern[i]; --i)
|
||||||
|
if(i==0) return(offset+length - skip);
|
||||||
|
|
||||||
|
if(skip + pattern.length - 1 < data.length)
|
||||||
|
skip += table[data[skip + pattern.length - 1]];
|
||||||
|
else
|
||||||
|
skip++;
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Search for a possibly partial match of the pattern at the start of the data.
|
||||||
|
* @param data The data in which to search for. The data may be arbitrary binary data,
|
||||||
|
* but the pattern will always be {@link StandardCharsets#US_ASCII} encoded.
|
||||||
|
* @param offset The offset within the data to start the search
|
||||||
|
* @param length The length of the data to search
|
||||||
|
* @param matched The length of the partial pattern already matched
|
||||||
|
* @return the length of the partial pattern matched and 0 for no match.
|
||||||
|
*/
|
||||||
|
public int startsWith(byte[] data, int offset, int length, int matched)
|
||||||
|
{
|
||||||
|
validate(data, offset, length);
|
||||||
|
|
||||||
|
int matchedCount = 0;
|
||||||
|
|
||||||
|
for(int i=0; i<pattern.length-matched && i < offset+length; i++)
|
||||||
|
{
|
||||||
|
if(data[i] == pattern[i+matched])
|
||||||
|
matchedCount++;
|
||||||
|
else
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
return matchedCount;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Performs legality checks for standard arguments input into SearchPattern methods.
|
||||||
|
* @param data The data in which to search for. The data may be arbitrary binary data,
|
||||||
|
* but the pattern will always be {@link StandardCharsets#US_ASCII} encoded.
|
||||||
|
* @param offset The offset within the data to start the search
|
||||||
|
* @param length The length of the data to search
|
||||||
|
*/
|
||||||
|
private void validate(byte[] data, int offset, int length)
|
||||||
|
{
|
||||||
|
if (offset < 0)
|
||||||
|
throw new IllegalArgumentException("offset was negative");
|
||||||
|
else if (length < 0)
|
||||||
|
throw new IllegalArgumentException("length was negative");
|
||||||
|
else if (offset + length > data.length)
|
||||||
|
throw new IllegalArgumentException("(offset+length) out of bounds of data[]");
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,172 @@
|
||||||
|
//
|
||||||
|
// ========================================================================
|
||||||
|
// Copyright (c) 1995-2018 Mort Bay Consulting Pty. Ltd.
|
||||||
|
// ------------------------------------------------------------------------
|
||||||
|
// All rights reserved. This program and the accompanying materials
|
||||||
|
// are made available under the terms of the Eclipse Public License v1.0
|
||||||
|
// and Apache License v2.0 which accompanies this distribution.
|
||||||
|
//
|
||||||
|
// The Eclipse Public License is available at
|
||||||
|
// http://www.eclipse.org/legal/epl-v10.html
|
||||||
|
//
|
||||||
|
// The Apache License v2.0 is available at
|
||||||
|
// http://www.opensource.org/licenses/apache2.0.php
|
||||||
|
//
|
||||||
|
// You may elect to redistribute this code under either of these licenses.
|
||||||
|
// ========================================================================
|
||||||
|
//
|
||||||
|
|
||||||
|
package org.eclipse.jetty.util;
|
||||||
|
|
||||||
|
|
||||||
|
import java.nio.charset.StandardCharsets;
|
||||||
|
|
||||||
|
import org.junit.Assert;
|
||||||
|
import org.junit.Test;
|
||||||
|
|
||||||
|
public class SearchPatternTest
|
||||||
|
|
||||||
|
{
|
||||||
|
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testBasicSearch()
|
||||||
|
{
|
||||||
|
byte[] p1 = new String("truth").getBytes(StandardCharsets.US_ASCII);
|
||||||
|
byte[] p2 = new String("evident").getBytes(StandardCharsets.US_ASCII);
|
||||||
|
byte[] p3 = new String("we").getBytes(StandardCharsets.US_ASCII);
|
||||||
|
byte[] d = new String("we hold these truths to be self evident").getBytes(StandardCharsets.US_ASCII);
|
||||||
|
|
||||||
|
// Testing Compiled Pattern p1 "truth"
|
||||||
|
SearchPattern sp1 = SearchPattern.compile(p1);
|
||||||
|
Assert.assertEquals(14,sp1.match(d, 0, d.length));
|
||||||
|
Assert.assertEquals(14,sp1.match(d,14,p1.length));
|
||||||
|
Assert.assertEquals(14,sp1.match(d,14,p1.length+1));
|
||||||
|
Assert.assertEquals(-1,sp1.match(d,14,p1.length-1));
|
||||||
|
Assert.assertEquals(-1,sp1.match(d,15,d.length-15));
|
||||||
|
|
||||||
|
// Testing Compiled Pattern p2 "evident"
|
||||||
|
SearchPattern sp2 = SearchPattern.compile(p2);
|
||||||
|
Assert.assertEquals(32,sp2.match(d, 0, d.length));
|
||||||
|
Assert.assertEquals(32,sp2.match(d,32,p2.length));
|
||||||
|
Assert.assertEquals(32,sp2.match(d,32,p2.length));
|
||||||
|
Assert.assertEquals(-1,sp2.match(d,32,p2.length-1));
|
||||||
|
Assert.assertEquals(-1,sp2.match(d,33,d.length-33));
|
||||||
|
|
||||||
|
// Testing Compiled Pattern p3 "evident"
|
||||||
|
SearchPattern sp3 = SearchPattern.compile(p3);
|
||||||
|
Assert.assertEquals( 0,sp3.match(d, 0, d.length));
|
||||||
|
Assert.assertEquals( 0,sp3.match(d, 0, p3.length));
|
||||||
|
Assert.assertEquals( 0,sp3.match(d, 0, p3.length+1));
|
||||||
|
Assert.assertEquals(-1,sp3.match(d, 0, p3.length-1));
|
||||||
|
Assert.assertEquals(-1,sp3.match(d, 1, d.length-1));
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testDoubleMatch()
|
||||||
|
{
|
||||||
|
byte[] p = new String("violent").getBytes(StandardCharsets.US_ASCII);
|
||||||
|
byte[] d = new String("These violent delights have violent ends.").getBytes(StandardCharsets.US_ASCII);
|
||||||
|
SearchPattern sp = SearchPattern.compile(p);
|
||||||
|
Assert.assertEquals( 6,sp.match(d, 0, d.length));
|
||||||
|
Assert.assertEquals(-1,sp.match(d, 6, p.length-1));
|
||||||
|
Assert.assertEquals(28,sp.match(d, 7, d.length-7));
|
||||||
|
Assert.assertEquals(28,sp.match(d, 28, d.length-28));
|
||||||
|
Assert.assertEquals(-1,sp.match(d, 29, d.length-29));
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testAlmostMatch()
|
||||||
|
{
|
||||||
|
byte[] p = new String("violent").getBytes(StandardCharsets.US_ASCII);
|
||||||
|
byte[] d = new String("vio lent violen v iolent violin vioviolenlent viiolent").getBytes(StandardCharsets.US_ASCII);
|
||||||
|
SearchPattern sp = SearchPattern.compile(p);
|
||||||
|
Assert.assertEquals(-1,sp.match(d, 0, d.length));
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testOddSizedPatterns()
|
||||||
|
{
|
||||||
|
// Test Large Pattern
|
||||||
|
byte[] p = new String("pneumonoultramicroscopicsilicovolcanoconiosis").getBytes(StandardCharsets.US_ASCII);
|
||||||
|
byte[] d = new String("pneumon").getBytes(StandardCharsets.US_ASCII);
|
||||||
|
SearchPattern sp = SearchPattern.compile(p);
|
||||||
|
Assert.assertEquals(-1,sp.match(d, 0, d.length));
|
||||||
|
|
||||||
|
// Test Single Character Pattern
|
||||||
|
p = new String("s").getBytes(StandardCharsets.US_ASCII);
|
||||||
|
d = new String("the cake is a lie").getBytes(StandardCharsets.US_ASCII);
|
||||||
|
sp = SearchPattern.compile(p);
|
||||||
|
Assert.assertEquals(10,sp.match(d, 0, d.length));
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testEndsWith()
|
||||||
|
{
|
||||||
|
byte[] p = new String("pneumonoultramicroscopicsilicovolcanoconiosis").getBytes(StandardCharsets.US_ASCII);
|
||||||
|
byte[] d = new String("pneumonoultrami").getBytes(StandardCharsets.US_ASCII);
|
||||||
|
SearchPattern sp = SearchPattern.compile(p);
|
||||||
|
Assert.assertEquals(15,sp.endsWith(d,0,d.length));
|
||||||
|
|
||||||
|
p = new String("abcdefghijklmnopqrstuvwxyz").getBytes(StandardCharsets.US_ASCII);
|
||||||
|
d = new String("abcdefghijklmnopqrstuvwxyzabcdefghijklmno").getBytes(StandardCharsets.US_ASCII);
|
||||||
|
sp = SearchPattern.compile(p);
|
||||||
|
Assert.assertEquals(0,sp.match(d,0,d.length));
|
||||||
|
Assert.assertEquals(-1,sp.match(d,1,d.length-1));
|
||||||
|
Assert.assertEquals(15,sp.endsWith(d,0,d.length));
|
||||||
|
|
||||||
|
p = new String("abcdefghijklmnopqrstuvwxyz").getBytes(StandardCharsets.US_ASCII);
|
||||||
|
d = new String("abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz").getBytes(StandardCharsets.US_ASCII);
|
||||||
|
sp = SearchPattern.compile(p);
|
||||||
|
Assert.assertEquals(0,sp.match(d,0,d.length));
|
||||||
|
Assert.assertEquals(26,sp.match(d,1,d.length-1));
|
||||||
|
Assert.assertEquals(26,sp.endsWith(d,0,d.length));
|
||||||
|
|
||||||
|
//test no match
|
||||||
|
p = new String("hello world").getBytes(StandardCharsets.US_ASCII);
|
||||||
|
d = new String("there is definitely no match in here").getBytes(StandardCharsets.US_ASCII);
|
||||||
|
sp = SearchPattern.compile(p);
|
||||||
|
Assert.assertEquals(0,sp.endsWith(d,0,d.length));
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testStartsWith()
|
||||||
|
{
|
||||||
|
byte[] p = new String("abcdefghijklmnopqrstuvwxyz").getBytes(StandardCharsets.US_ASCII);
|
||||||
|
byte[] d = new String("ijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz").getBytes(StandardCharsets.US_ASCII);
|
||||||
|
SearchPattern sp = SearchPattern.compile(p);
|
||||||
|
Assert.assertEquals(18,sp.match(d,0,d.length));
|
||||||
|
Assert.assertEquals(-1,sp.match(d,19,d.length-19));
|
||||||
|
Assert.assertEquals(18,sp.startsWith(d,0,d.length,8));
|
||||||
|
|
||||||
|
p = new String("abcdefghijklmnopqrstuvwxyz").getBytes(StandardCharsets.US_ASCII);
|
||||||
|
d = new String("ijklmnopqrstuvwxyz abcdefghijklmnopqrstuvwxyz").getBytes(StandardCharsets.US_ASCII);
|
||||||
|
sp = SearchPattern.compile(p);
|
||||||
|
Assert.assertEquals(19,sp.match(d,0,d.length));
|
||||||
|
Assert.assertEquals(-1,sp.match(d,20,d.length-20));
|
||||||
|
Assert.assertEquals(18,sp.startsWith(d,0,d.length,8));
|
||||||
|
|
||||||
|
p = new String("abcdefghijklmnopqrstuvwxyz").getBytes(StandardCharsets.US_ASCII);
|
||||||
|
d = new String("abcdefghijklmnopqrstuvwxyz abcdefghijklmnopqrstuvwxyz").getBytes(StandardCharsets.US_ASCII);
|
||||||
|
sp = SearchPattern.compile(p);
|
||||||
|
Assert.assertEquals(26,sp.startsWith(d,0,d.length,0));
|
||||||
|
|
||||||
|
//test no match
|
||||||
|
p = new String("hello world").getBytes(StandardCharsets.US_ASCII);
|
||||||
|
d = new String("there is definitely no match in here").getBytes(StandardCharsets.US_ASCII);
|
||||||
|
sp = SearchPattern.compile(p);
|
||||||
|
Assert.assertEquals(0,sp.startsWith(d,0,d.length,0));
|
||||||
|
|
||||||
|
//test large pattern small buffer
|
||||||
|
p = new String("abcdefghijklmnopqrstuvwxyz").getBytes(StandardCharsets.US_ASCII);
|
||||||
|
d = new String("mnopqrs").getBytes(StandardCharsets.US_ASCII);
|
||||||
|
sp = SearchPattern.compile(p);
|
||||||
|
Assert.assertEquals(7,sp.startsWith(d,0,d.length,12));
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue