Merge remote-tracking branch 'lachlan/jetty-9.4.x-1027-SearchPattern' into jetty-9.4.x-1027-Multipart
This commit is contained in:
commit
25b8933d34
|
@ -0,0 +1,179 @@
|
|||
//
|
||||
// ========================================================================
|
||||
// Copyright (c) 1995-2018 Mort Bay Consulting Pty. Ltd.
|
||||
// ------------------------------------------------------------------------
|
||||
// All rights reserved. This program and the accompanying materials
|
||||
// are made available under the terms of the Eclipse Public License v1.0
|
||||
// and Apache License v2.0 which accompanies this distribution.
|
||||
//
|
||||
// The Eclipse Public License is available at
|
||||
// http://www.eclipse.org/legal/epl-v10.html
|
||||
//
|
||||
// The Apache License v2.0 is available at
|
||||
// http://www.opensource.org/licenses/apache2.0.php
|
||||
//
|
||||
// You may elect to redistribute this code under either of these licenses.
|
||||
// ========================================================================
|
||||
//
|
||||
|
||||
package org.eclipse.jetty.util;
|
||||
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.util.Arrays;
|
||||
|
||||
|
||||
/**
|
||||
* SearchPattern
|
||||
*
|
||||
* Fast search for patterns within strings and arrays of bytes.
|
||||
* Uses an implementation of the Boyer–Moore–Horspool algorithm
|
||||
* with a 256 character alphabet.
|
||||
*
|
||||
* The algorithm has an average-case complexity of O(n)
|
||||
* on random text and O(nm) in the worst case.
|
||||
* where:
|
||||
* m = pattern length
|
||||
* n = length of data to search
|
||||
*/
|
||||
public class SearchPattern
|
||||
{
|
||||
static final int alphabetSize = 256;
|
||||
private int[] table;
|
||||
private byte[] pattern;
|
||||
|
||||
/**
|
||||
* Produces a SearchPattern instance which can be used
|
||||
* to find matches of the pattern in data
|
||||
* @param pattern byte array containing the pattern
|
||||
* @return a new SearchPattern instance using the given pattern
|
||||
*/
|
||||
public static SearchPattern compile(byte[] pattern)
|
||||
{
|
||||
return new SearchPattern(Arrays.copyOf(pattern, pattern.length));
|
||||
}
|
||||
|
||||
/**
|
||||
* Produces a SearchPattern instance which can be used
|
||||
* to find matches of the pattern in data
|
||||
* @param pattern string containing the pattern
|
||||
* @return a new SearchPattern instance using the given pattern
|
||||
*/
|
||||
public static SearchPattern compile(String pattern)
|
||||
{
|
||||
return new SearchPattern(pattern.getBytes(StandardCharsets.UTF_8));
|
||||
}
|
||||
|
||||
/**
|
||||
* @param pattern byte array containing the pattern used for matching
|
||||
*/
|
||||
private SearchPattern(byte[] pattern)
|
||||
{
|
||||
this.pattern = pattern;
|
||||
|
||||
if(pattern.length == 0)
|
||||
throw new IllegalArgumentException("Empty Pattern");
|
||||
|
||||
//Build up the pre-processed table for this pattern.
|
||||
table = new int[alphabetSize];
|
||||
for(int i = 0; i<table.length; ++i)
|
||||
table[i] = pattern.length;
|
||||
for(int i = 0; i<pattern.length-1; ++i)
|
||||
table[pattern[i]] = pattern.length-1-i;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Search for a complete match of the pattern within the data
|
||||
* @param data The data in which to search for. The data may be arbitrary binary data,
|
||||
* but the pattern will always be {@link StandardCharsets#US_ASCII} encoded.
|
||||
* @param offset The offset within the data to start the search
|
||||
* @param length The length of the data to search
|
||||
* @return The index within the data array at which the first instance of the pattern or -1 if not found
|
||||
*/
|
||||
public int match(byte[] data, int offset, int length)
|
||||
{
|
||||
validate(data, offset, length);
|
||||
|
||||
int skip = offset;
|
||||
while(skip <= offset+length - pattern.length)
|
||||
{
|
||||
for(int i = pattern.length-1; data[skip+i] == pattern[i]; i--)
|
||||
if(i==0) return skip;
|
||||
|
||||
skip += table[data[skip + pattern.length - 1]];
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
/**
|
||||
* Search for a partial match of the pattern at the end of the data.
|
||||
* @param data The data in which to search for. The data may be arbitrary binary data,
|
||||
* but the pattern will always be {@link StandardCharsets#US_ASCII} encoded.
|
||||
* @param offset The offset within the data to start the search
|
||||
* @param length The length of the data to search
|
||||
* @return the length of the partial pattern matched and 0 for no match.
|
||||
*/
|
||||
public int endsWith(byte[] data, int offset, int length)
|
||||
{
|
||||
validate(data, offset, length);
|
||||
|
||||
int skip = (pattern.length <= length) ? (offset+length-pattern.length) : offset;
|
||||
while(skip < offset+length)
|
||||
{
|
||||
for(int i = (offset+length-1)-skip; data[skip+i] == pattern[i]; --i)
|
||||
if(i==0) return(offset+length - skip);
|
||||
|
||||
if(skip + pattern.length - 1 < data.length)
|
||||
skip += table[data[skip + pattern.length - 1]];
|
||||
else
|
||||
skip++;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Search for a possibly partial match of the pattern at the start of the data.
|
||||
* @param data The data in which to search for. The data may be arbitrary binary data,
|
||||
* but the pattern will always be {@link StandardCharsets#US_ASCII} encoded.
|
||||
* @param offset The offset within the data to start the search
|
||||
* @param length The length of the data to search
|
||||
* @param matched The length of the partial pattern already matched
|
||||
* @return the length of the partial pattern matched and 0 for no match.
|
||||
*/
|
||||
public int startsWith(byte[] data, int offset, int length, int matched)
|
||||
{
|
||||
validate(data, offset, length);
|
||||
|
||||
int matchedCount = 0;
|
||||
|
||||
for(int i=0; i<pattern.length-matched && i < offset+length; i++)
|
||||
{
|
||||
if(data[i] == pattern[i+matched])
|
||||
matchedCount++;
|
||||
else
|
||||
break;
|
||||
}
|
||||
|
||||
return matchedCount;
|
||||
}
|
||||
|
||||
/**
|
||||
* Performs legality checks for standard arguments input into SearchPattern methods.
|
||||
* @param data The data in which to search for. The data may be arbitrary binary data,
|
||||
* but the pattern will always be {@link StandardCharsets#US_ASCII} encoded.
|
||||
* @param offset The offset within the data to start the search
|
||||
* @param length The length of the data to search
|
||||
*/
|
||||
private void validate(byte[] data, int offset, int length)
|
||||
{
|
||||
if (offset < 0)
|
||||
throw new IllegalArgumentException("offset was negative");
|
||||
else if (length < 0)
|
||||
throw new IllegalArgumentException("length was negative");
|
||||
else if (offset + length > data.length)
|
||||
throw new IllegalArgumentException("(offset+length) out of bounds of data[]");
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,172 @@
|
|||
//
|
||||
// ========================================================================
|
||||
// Copyright (c) 1995-2018 Mort Bay Consulting Pty. Ltd.
|
||||
// ------------------------------------------------------------------------
|
||||
// All rights reserved. This program and the accompanying materials
|
||||
// are made available under the terms of the Eclipse Public License v1.0
|
||||
// and Apache License v2.0 which accompanies this distribution.
|
||||
//
|
||||
// The Eclipse Public License is available at
|
||||
// http://www.eclipse.org/legal/epl-v10.html
|
||||
//
|
||||
// The Apache License v2.0 is available at
|
||||
// http://www.opensource.org/licenses/apache2.0.php
|
||||
//
|
||||
// You may elect to redistribute this code under either of these licenses.
|
||||
// ========================================================================
|
||||
//
|
||||
|
||||
package org.eclipse.jetty.util;
|
||||
|
||||
|
||||
import java.nio.charset.StandardCharsets;
|
||||
|
||||
import org.junit.Assert;
|
||||
import org.junit.Test;
|
||||
|
||||
public class SearchPatternTest
|
||||
|
||||
{
|
||||
|
||||
|
||||
@Test
|
||||
public void testBasicSearch()
|
||||
{
|
||||
byte[] p1 = new String("truth").getBytes(StandardCharsets.US_ASCII);
|
||||
byte[] p2 = new String("evident").getBytes(StandardCharsets.US_ASCII);
|
||||
byte[] p3 = new String("we").getBytes(StandardCharsets.US_ASCII);
|
||||
byte[] d = new String("we hold these truths to be self evident").getBytes(StandardCharsets.US_ASCII);
|
||||
|
||||
// Testing Compiled Pattern p1 "truth"
|
||||
SearchPattern sp1 = SearchPattern.compile(p1);
|
||||
Assert.assertEquals(14,sp1.match(d, 0, d.length));
|
||||
Assert.assertEquals(14,sp1.match(d,14,p1.length));
|
||||
Assert.assertEquals(14,sp1.match(d,14,p1.length+1));
|
||||
Assert.assertEquals(-1,sp1.match(d,14,p1.length-1));
|
||||
Assert.assertEquals(-1,sp1.match(d,15,d.length-15));
|
||||
|
||||
// Testing Compiled Pattern p2 "evident"
|
||||
SearchPattern sp2 = SearchPattern.compile(p2);
|
||||
Assert.assertEquals(32,sp2.match(d, 0, d.length));
|
||||
Assert.assertEquals(32,sp2.match(d,32,p2.length));
|
||||
Assert.assertEquals(32,sp2.match(d,32,p2.length));
|
||||
Assert.assertEquals(-1,sp2.match(d,32,p2.length-1));
|
||||
Assert.assertEquals(-1,sp2.match(d,33,d.length-33));
|
||||
|
||||
// Testing Compiled Pattern p3 "evident"
|
||||
SearchPattern sp3 = SearchPattern.compile(p3);
|
||||
Assert.assertEquals( 0,sp3.match(d, 0, d.length));
|
||||
Assert.assertEquals( 0,sp3.match(d, 0, p3.length));
|
||||
Assert.assertEquals( 0,sp3.match(d, 0, p3.length+1));
|
||||
Assert.assertEquals(-1,sp3.match(d, 0, p3.length-1));
|
||||
Assert.assertEquals(-1,sp3.match(d, 1, d.length-1));
|
||||
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testDoubleMatch()
|
||||
{
|
||||
byte[] p = new String("violent").getBytes(StandardCharsets.US_ASCII);
|
||||
byte[] d = new String("These violent delights have violent ends.").getBytes(StandardCharsets.US_ASCII);
|
||||
SearchPattern sp = SearchPattern.compile(p);
|
||||
Assert.assertEquals( 6,sp.match(d, 0, d.length));
|
||||
Assert.assertEquals(-1,sp.match(d, 6, p.length-1));
|
||||
Assert.assertEquals(28,sp.match(d, 7, d.length-7));
|
||||
Assert.assertEquals(28,sp.match(d, 28, d.length-28));
|
||||
Assert.assertEquals(-1,sp.match(d, 29, d.length-29));
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testAlmostMatch()
|
||||
{
|
||||
byte[] p = new String("violent").getBytes(StandardCharsets.US_ASCII);
|
||||
byte[] d = new String("vio lent violen v iolent violin vioviolenlent viiolent").getBytes(StandardCharsets.US_ASCII);
|
||||
SearchPattern sp = SearchPattern.compile(p);
|
||||
Assert.assertEquals(-1,sp.match(d, 0, d.length));
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testOddSizedPatterns()
|
||||
{
|
||||
// Test Large Pattern
|
||||
byte[] p = new String("pneumonoultramicroscopicsilicovolcanoconiosis").getBytes(StandardCharsets.US_ASCII);
|
||||
byte[] d = new String("pneumon").getBytes(StandardCharsets.US_ASCII);
|
||||
SearchPattern sp = SearchPattern.compile(p);
|
||||
Assert.assertEquals(-1,sp.match(d, 0, d.length));
|
||||
|
||||
// Test Single Character Pattern
|
||||
p = new String("s").getBytes(StandardCharsets.US_ASCII);
|
||||
d = new String("the cake is a lie").getBytes(StandardCharsets.US_ASCII);
|
||||
sp = SearchPattern.compile(p);
|
||||
Assert.assertEquals(10,sp.match(d, 0, d.length));
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testEndsWith()
|
||||
{
|
||||
byte[] p = new String("pneumonoultramicroscopicsilicovolcanoconiosis").getBytes(StandardCharsets.US_ASCII);
|
||||
byte[] d = new String("pneumonoultrami").getBytes(StandardCharsets.US_ASCII);
|
||||
SearchPattern sp = SearchPattern.compile(p);
|
||||
Assert.assertEquals(15,sp.endsWith(d,0,d.length));
|
||||
|
||||
p = new String("abcdefghijklmnopqrstuvwxyz").getBytes(StandardCharsets.US_ASCII);
|
||||
d = new String("abcdefghijklmnopqrstuvwxyzabcdefghijklmno").getBytes(StandardCharsets.US_ASCII);
|
||||
sp = SearchPattern.compile(p);
|
||||
Assert.assertEquals(0,sp.match(d,0,d.length));
|
||||
Assert.assertEquals(-1,sp.match(d,1,d.length-1));
|
||||
Assert.assertEquals(15,sp.endsWith(d,0,d.length));
|
||||
|
||||
p = new String("abcdefghijklmnopqrstuvwxyz").getBytes(StandardCharsets.US_ASCII);
|
||||
d = new String("abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz").getBytes(StandardCharsets.US_ASCII);
|
||||
sp = SearchPattern.compile(p);
|
||||
Assert.assertEquals(0,sp.match(d,0,d.length));
|
||||
Assert.assertEquals(26,sp.match(d,1,d.length-1));
|
||||
Assert.assertEquals(26,sp.endsWith(d,0,d.length));
|
||||
|
||||
//test no match
|
||||
p = new String("hello world").getBytes(StandardCharsets.US_ASCII);
|
||||
d = new String("there is definitely no match in here").getBytes(StandardCharsets.US_ASCII);
|
||||
sp = SearchPattern.compile(p);
|
||||
Assert.assertEquals(0,sp.endsWith(d,0,d.length));
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testStartsWith()
|
||||
{
|
||||
byte[] p = new String("abcdefghijklmnopqrstuvwxyz").getBytes(StandardCharsets.US_ASCII);
|
||||
byte[] d = new String("ijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz").getBytes(StandardCharsets.US_ASCII);
|
||||
SearchPattern sp = SearchPattern.compile(p);
|
||||
Assert.assertEquals(18,sp.match(d,0,d.length));
|
||||
Assert.assertEquals(-1,sp.match(d,19,d.length-19));
|
||||
Assert.assertEquals(18,sp.startsWith(d,0,d.length,8));
|
||||
|
||||
p = new String("abcdefghijklmnopqrstuvwxyz").getBytes(StandardCharsets.US_ASCII);
|
||||
d = new String("ijklmnopqrstuvwxyz abcdefghijklmnopqrstuvwxyz").getBytes(StandardCharsets.US_ASCII);
|
||||
sp = SearchPattern.compile(p);
|
||||
Assert.assertEquals(19,sp.match(d,0,d.length));
|
||||
Assert.assertEquals(-1,sp.match(d,20,d.length-20));
|
||||
Assert.assertEquals(18,sp.startsWith(d,0,d.length,8));
|
||||
|
||||
p = new String("abcdefghijklmnopqrstuvwxyz").getBytes(StandardCharsets.US_ASCII);
|
||||
d = new String("abcdefghijklmnopqrstuvwxyz abcdefghijklmnopqrstuvwxyz").getBytes(StandardCharsets.US_ASCII);
|
||||
sp = SearchPattern.compile(p);
|
||||
Assert.assertEquals(26,sp.startsWith(d,0,d.length,0));
|
||||
|
||||
//test no match
|
||||
p = new String("hello world").getBytes(StandardCharsets.US_ASCII);
|
||||
d = new String("there is definitely no match in here").getBytes(StandardCharsets.US_ASCII);
|
||||
sp = SearchPattern.compile(p);
|
||||
Assert.assertEquals(0,sp.startsWith(d,0,d.length,0));
|
||||
|
||||
//test large pattern small buffer
|
||||
p = new String("abcdefghijklmnopqrstuvwxyz").getBytes(StandardCharsets.US_ASCII);
|
||||
d = new String("mnopqrs").getBytes(StandardCharsets.US_ASCII);
|
||||
sp = SearchPattern.compile(p);
|
||||
Assert.assertEquals(7,sp.startsWith(d,0,d.length,12));
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue