Merge remote-tracking branch 'lachlan/jetty-9.4.x-1027-SearchPattern' into jetty-9.4.x-1027-Multipart

This commit is contained in:
Greg Wilkins 2018-03-08 15:05:35 +11:00
commit 25b8933d34
2 changed files with 351 additions and 0 deletions

View File

@ -0,0 +1,179 @@
//
// ========================================================================
// Copyright (c) 1995-2018 Mort Bay Consulting Pty. Ltd.
// ------------------------------------------------------------------------
// All rights reserved. This program and the accompanying materials
// are made available under the terms of the Eclipse Public License v1.0
// and Apache License v2.0 which accompanies this distribution.
//
// The Eclipse Public License is available at
// http://www.eclipse.org/legal/epl-v10.html
//
// The Apache License v2.0 is available at
// http://www.opensource.org/licenses/apache2.0.php
//
// You may elect to redistribute this code under either of these licenses.
// ========================================================================
//
package org.eclipse.jetty.util;
import java.nio.charset.StandardCharsets;
import java.util.Arrays;
/**
* SearchPattern
*
* Fast search for patterns within strings and arrays of bytes.
* Uses an implementation of the BoyerMooreHorspool algorithm
* with a 256 character alphabet.
*
* The algorithm has an average-case complexity of O(n)
* on random text and O(nm) in the worst case.
* where:
* m = pattern length
* n = length of data to search
*/
public class SearchPattern
{
static final int alphabetSize = 256;
private int[] table;
private byte[] pattern;
/**
* Produces a SearchPattern instance which can be used
* to find matches of the pattern in data
* @param pattern byte array containing the pattern
* @return a new SearchPattern instance using the given pattern
*/
public static SearchPattern compile(byte[] pattern)
{
return new SearchPattern(Arrays.copyOf(pattern, pattern.length));
}
/**
* Produces a SearchPattern instance which can be used
* to find matches of the pattern in data
* @param pattern string containing the pattern
* @return a new SearchPattern instance using the given pattern
*/
public static SearchPattern compile(String pattern)
{
return new SearchPattern(pattern.getBytes(StandardCharsets.UTF_8));
}
/**
* @param pattern byte array containing the pattern used for matching
*/
private SearchPattern(byte[] pattern)
{
this.pattern = pattern;
if(pattern.length == 0)
throw new IllegalArgumentException("Empty Pattern");
//Build up the pre-processed table for this pattern.
table = new int[alphabetSize];
for(int i = 0; i<table.length; ++i)
table[i] = pattern.length;
for(int i = 0; i<pattern.length-1; ++i)
table[pattern[i]] = pattern.length-1-i;
}
/**
* Search for a complete match of the pattern within the data
* @param data The data in which to search for. The data may be arbitrary binary data,
* but the pattern will always be {@link StandardCharsets#US_ASCII} encoded.
* @param offset The offset within the data to start the search
* @param length The length of the data to search
* @return The index within the data array at which the first instance of the pattern or -1 if not found
*/
public int match(byte[] data, int offset, int length)
{
validate(data, offset, length);
int skip = offset;
while(skip <= offset+length - pattern.length)
{
for(int i = pattern.length-1; data[skip+i] == pattern[i]; i--)
if(i==0) return skip;
skip += table[data[skip + pattern.length - 1]];
}
return -1;
}
/**
* Search for a partial match of the pattern at the end of the data.
* @param data The data in which to search for. The data may be arbitrary binary data,
* but the pattern will always be {@link StandardCharsets#US_ASCII} encoded.
* @param offset The offset within the data to start the search
* @param length The length of the data to search
* @return the length of the partial pattern matched and 0 for no match.
*/
public int endsWith(byte[] data, int offset, int length)
{
validate(data, offset, length);
int skip = (pattern.length <= length) ? (offset+length-pattern.length) : offset;
while(skip < offset+length)
{
for(int i = (offset+length-1)-skip; data[skip+i] == pattern[i]; --i)
if(i==0) return(offset+length - skip);
if(skip + pattern.length - 1 < data.length)
skip += table[data[skip + pattern.length - 1]];
else
skip++;
}
return 0;
}
/**
* Search for a possibly partial match of the pattern at the start of the data.
* @param data The data in which to search for. The data may be arbitrary binary data,
* but the pattern will always be {@link StandardCharsets#US_ASCII} encoded.
* @param offset The offset within the data to start the search
* @param length The length of the data to search
* @param matched The length of the partial pattern already matched
* @return the length of the partial pattern matched and 0 for no match.
*/
public int startsWith(byte[] data, int offset, int length, int matched)
{
validate(data, offset, length);
int matchedCount = 0;
for(int i=0; i<pattern.length-matched && i < offset+length; i++)
{
if(data[i] == pattern[i+matched])
matchedCount++;
else
break;
}
return matchedCount;
}
/**
* Performs legality checks for standard arguments input into SearchPattern methods.
* @param data The data in which to search for. The data may be arbitrary binary data,
* but the pattern will always be {@link StandardCharsets#US_ASCII} encoded.
* @param offset The offset within the data to start the search
* @param length The length of the data to search
*/
private void validate(byte[] data, int offset, int length)
{
if (offset < 0)
throw new IllegalArgumentException("offset was negative");
else if (length < 0)
throw new IllegalArgumentException("length was negative");
else if (offset + length > data.length)
throw new IllegalArgumentException("(offset+length) out of bounds of data[]");
}
}

View File

@ -0,0 +1,172 @@
//
// ========================================================================
// Copyright (c) 1995-2018 Mort Bay Consulting Pty. Ltd.
// ------------------------------------------------------------------------
// All rights reserved. This program and the accompanying materials
// are made available under the terms of the Eclipse Public License v1.0
// and Apache License v2.0 which accompanies this distribution.
//
// The Eclipse Public License is available at
// http://www.eclipse.org/legal/epl-v10.html
//
// The Apache License v2.0 is available at
// http://www.opensource.org/licenses/apache2.0.php
//
// You may elect to redistribute this code under either of these licenses.
// ========================================================================
//
package org.eclipse.jetty.util;
import java.nio.charset.StandardCharsets;
import org.junit.Assert;
import org.junit.Test;
public class SearchPatternTest
{
@Test
public void testBasicSearch()
{
byte[] p1 = new String("truth").getBytes(StandardCharsets.US_ASCII);
byte[] p2 = new String("evident").getBytes(StandardCharsets.US_ASCII);
byte[] p3 = new String("we").getBytes(StandardCharsets.US_ASCII);
byte[] d = new String("we hold these truths to be self evident").getBytes(StandardCharsets.US_ASCII);
// Testing Compiled Pattern p1 "truth"
SearchPattern sp1 = SearchPattern.compile(p1);
Assert.assertEquals(14,sp1.match(d, 0, d.length));
Assert.assertEquals(14,sp1.match(d,14,p1.length));
Assert.assertEquals(14,sp1.match(d,14,p1.length+1));
Assert.assertEquals(-1,sp1.match(d,14,p1.length-1));
Assert.assertEquals(-1,sp1.match(d,15,d.length-15));
// Testing Compiled Pattern p2 "evident"
SearchPattern sp2 = SearchPattern.compile(p2);
Assert.assertEquals(32,sp2.match(d, 0, d.length));
Assert.assertEquals(32,sp2.match(d,32,p2.length));
Assert.assertEquals(32,sp2.match(d,32,p2.length));
Assert.assertEquals(-1,sp2.match(d,32,p2.length-1));
Assert.assertEquals(-1,sp2.match(d,33,d.length-33));
// Testing Compiled Pattern p3 "evident"
SearchPattern sp3 = SearchPattern.compile(p3);
Assert.assertEquals( 0,sp3.match(d, 0, d.length));
Assert.assertEquals( 0,sp3.match(d, 0, p3.length));
Assert.assertEquals( 0,sp3.match(d, 0, p3.length+1));
Assert.assertEquals(-1,sp3.match(d, 0, p3.length-1));
Assert.assertEquals(-1,sp3.match(d, 1, d.length-1));
}
@Test
public void testDoubleMatch()
{
byte[] p = new String("violent").getBytes(StandardCharsets.US_ASCII);
byte[] d = new String("These violent delights have violent ends.").getBytes(StandardCharsets.US_ASCII);
SearchPattern sp = SearchPattern.compile(p);
Assert.assertEquals( 6,sp.match(d, 0, d.length));
Assert.assertEquals(-1,sp.match(d, 6, p.length-1));
Assert.assertEquals(28,sp.match(d, 7, d.length-7));
Assert.assertEquals(28,sp.match(d, 28, d.length-28));
Assert.assertEquals(-1,sp.match(d, 29, d.length-29));
}
@Test
public void testAlmostMatch()
{
byte[] p = new String("violent").getBytes(StandardCharsets.US_ASCII);
byte[] d = new String("vio lent violen v iolent violin vioviolenlent viiolent").getBytes(StandardCharsets.US_ASCII);
SearchPattern sp = SearchPattern.compile(p);
Assert.assertEquals(-1,sp.match(d, 0, d.length));
}
@Test
public void testOddSizedPatterns()
{
// Test Large Pattern
byte[] p = new String("pneumonoultramicroscopicsilicovolcanoconiosis").getBytes(StandardCharsets.US_ASCII);
byte[] d = new String("pneumon").getBytes(StandardCharsets.US_ASCII);
SearchPattern sp = SearchPattern.compile(p);
Assert.assertEquals(-1,sp.match(d, 0, d.length));
// Test Single Character Pattern
p = new String("s").getBytes(StandardCharsets.US_ASCII);
d = new String("the cake is a lie").getBytes(StandardCharsets.US_ASCII);
sp = SearchPattern.compile(p);
Assert.assertEquals(10,sp.match(d, 0, d.length));
}
@Test
public void testEndsWith()
{
byte[] p = new String("pneumonoultramicroscopicsilicovolcanoconiosis").getBytes(StandardCharsets.US_ASCII);
byte[] d = new String("pneumonoultrami").getBytes(StandardCharsets.US_ASCII);
SearchPattern sp = SearchPattern.compile(p);
Assert.assertEquals(15,sp.endsWith(d,0,d.length));
p = new String("abcdefghijklmnopqrstuvwxyz").getBytes(StandardCharsets.US_ASCII);
d = new String("abcdefghijklmnopqrstuvwxyzabcdefghijklmno").getBytes(StandardCharsets.US_ASCII);
sp = SearchPattern.compile(p);
Assert.assertEquals(0,sp.match(d,0,d.length));
Assert.assertEquals(-1,sp.match(d,1,d.length-1));
Assert.assertEquals(15,sp.endsWith(d,0,d.length));
p = new String("abcdefghijklmnopqrstuvwxyz").getBytes(StandardCharsets.US_ASCII);
d = new String("abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz").getBytes(StandardCharsets.US_ASCII);
sp = SearchPattern.compile(p);
Assert.assertEquals(0,sp.match(d,0,d.length));
Assert.assertEquals(26,sp.match(d,1,d.length-1));
Assert.assertEquals(26,sp.endsWith(d,0,d.length));
//test no match
p = new String("hello world").getBytes(StandardCharsets.US_ASCII);
d = new String("there is definitely no match in here").getBytes(StandardCharsets.US_ASCII);
sp = SearchPattern.compile(p);
Assert.assertEquals(0,sp.endsWith(d,0,d.length));
}
@Test
public void testStartsWith()
{
byte[] p = new String("abcdefghijklmnopqrstuvwxyz").getBytes(StandardCharsets.US_ASCII);
byte[] d = new String("ijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz").getBytes(StandardCharsets.US_ASCII);
SearchPattern sp = SearchPattern.compile(p);
Assert.assertEquals(18,sp.match(d,0,d.length));
Assert.assertEquals(-1,sp.match(d,19,d.length-19));
Assert.assertEquals(18,sp.startsWith(d,0,d.length,8));
p = new String("abcdefghijklmnopqrstuvwxyz").getBytes(StandardCharsets.US_ASCII);
d = new String("ijklmnopqrstuvwxyz abcdefghijklmnopqrstuvwxyz").getBytes(StandardCharsets.US_ASCII);
sp = SearchPattern.compile(p);
Assert.assertEquals(19,sp.match(d,0,d.length));
Assert.assertEquals(-1,sp.match(d,20,d.length-20));
Assert.assertEquals(18,sp.startsWith(d,0,d.length,8));
p = new String("abcdefghijklmnopqrstuvwxyz").getBytes(StandardCharsets.US_ASCII);
d = new String("abcdefghijklmnopqrstuvwxyz abcdefghijklmnopqrstuvwxyz").getBytes(StandardCharsets.US_ASCII);
sp = SearchPattern.compile(p);
Assert.assertEquals(26,sp.startsWith(d,0,d.length,0));
//test no match
p = new String("hello world").getBytes(StandardCharsets.US_ASCII);
d = new String("there is definitely no match in here").getBytes(StandardCharsets.US_ASCII);
sp = SearchPattern.compile(p);
Assert.assertEquals(0,sp.startsWith(d,0,d.length,0));
//test large pattern small buffer
p = new String("abcdefghijklmnopqrstuvwxyz").getBytes(StandardCharsets.US_ASCII);
d = new String("mnopqrs").getBytes(StandardCharsets.US_ASCII);
sp = SearchPattern.compile(p);
Assert.assertEquals(7,sp.startsWith(d,0,d.length,12));
}
}