RegexPathSpec documentation and MatchedPath improvements (#8163)

* More documentation

Signed-off-by: Joakim Erdfelt <joakim.erdfelt@gmail.com>
This commit is contained in:
Joakim Erdfelt 2022-06-16 13:53:04 -05:00 committed by GitHub
parent 1f902f6371
commit 1b4f941942
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 262 additions and 114 deletions

View File

@ -21,6 +21,77 @@ import java.util.regex.Pattern;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* <p>
* RegexPathSpec is a PathSpec implementation for a {@link PathMappings} instance.
* </p>
*
* <p>
* Supports the standard Java regex found in {@link java.util.regex.Pattern}.
* </p>
*
* <p>
* Supports {@link PathSpecGroup} for {@link PathSpecGroup#EXACT}, {@link PathSpecGroup#PREFIX_GLOB}, {@link PathSpecGroup#MIDDLE_GLOB}, and {@link PathSpecGroup#SUFFIX_GLOB}.
* This is done by evaluating the signature or the provided regex pattern for what is a literal vs a glob (of any kind).
* </p>
*
* <ul>
* <li>Only literals, it's a {@link PathSpecGroup#EXACT}.</li>
* <li>Starts with literals, ends with globs, it's a {@link PathSpecGroup#PREFIX_GLOB}</li>
* <li>Starts with glob, has at least 1 literal, then any thing else, it's a {@link PathSpecGroup#SUFFIX_GLOB}</li>
* <li>All other signatures are a {@link PathSpecGroup#MIDDLE_GLOB}</li>
* </ul>
*
* <p>
* The use of regex capture groups, regex character classes, regex quantifiers, and regex special contructs
* will be identified as a glob (for signature determination), all other characters are identified
* as literal. The regex {@code ^} beginning of line, and regex {@code $} end of line are ignored.
* </p>
*
* <p>
* <b>Support for {@link MatchedPath} and PathMatch vs PathInfo</b>
* </p>
*
* <p>
* There's a few steps in evaluating the matched input path for determining where the split
* in the input path should occur for {@link MatchedPath#getPathMatch()} and {@link MatchedPath#getPathInfo()}.
* </p>
*
* <ol>
* <li>
* If there are no regex capturing groups,
* the entire path is returned in {@link MatchedPath#getPathMatch()},
* and a null returned for {@link MatchedPath#getPathInfo()}
* </li>
* <li>
* If both the named regex capturing groups {@code name} and {@code info} are present, then
* the {@code name} group is returned in {@link MatchedPath#getPathMatch()} and the
* {@code info} group is returned in {@link MatchedPath#getPathInfo()}
* </li>
* <li>
* If there is only 1 regex capturing group
* <ul>
* <li>
* If the named regex capturing group {@code name} is present, the
* input path up to the end of the capturing group is returned
* in {@link MatchedPath#getPathMatch()} and any following characters (or null)
* are returned in {@link MatchedPath#getPathInfo()}
* </li>
* <li>
* other wise the input path up to the start of the capturing group is returned
* in {@link MatchedPath#getPathMatch()} and any following characters (or null)
* are returned in {@link MatchedPath#getPathInfo()}
* </li>
* </ul>
* If the split on pathMatch ends with {@code /} AND the pathInfo doesn't start with {@code /}
* then the slash is moved from pathMatch to pathInfo.
* </li>
* <li>
* All other RegexPathSpec signatures will return the entire path
* in {@link MatchedPath#getPathMatch()}, and a null returned for {@link MatchedPath#getPathInfo()}
* </li>
* </ol>
*/
public class RegexPathSpec extends AbstractPathSpec
{
private static final Logger LOG = LoggerFactory.getLogger(UriTemplatePathSpec.class);
@ -54,8 +125,9 @@ public class RegexPathSpec extends AbstractPathSpec
declaration = regex;
int specLength = declaration.length();
// build up a simple signature we can use to identify the grouping
boolean inTextList = false;
boolean inCharacterClass = false;
boolean inQuantifier = false;
boolean inCaptureGroup = false;
StringBuilder signature = new StringBuilder();
int pathDepth = 0;
@ -68,8 +140,6 @@ public class RegexPathSpec extends AbstractPathSpec
case '^': // ignore anchors
case '$': // ignore anchors
case '\'': // ignore escaping
case '(': // ignore grouping
case ')': // ignore grouping
break;
case '+': // single char quantifier
case '?': // single char quantifier
@ -78,25 +148,32 @@ public class RegexPathSpec extends AbstractPathSpec
case '.': // any char token
signature.append('g'); // glob
break;
case '{':
case '(': // in regex capture group
inCaptureGroup = true;
break;
case ')':
inCaptureGroup = false;
signature.append('g');
break;
case '{': // in regex quantifier
inQuantifier = true;
break;
case '}':
inQuantifier = false;
break;
case '[':
inTextList = true;
case '[': // in regex character class
inCharacterClass = true;
break;
case ']':
inTextList = false;
inCharacterClass = false;
signature.append('g'); // glob
break;
case '/':
if (!inTextList && !inQuantifier)
if (!inCharacterClass && !inQuantifier && !inCaptureGroup)
pathDepth++;
break;
default:
if (!inTextList && !inQuantifier && Character.isLetterOrDigit(c))
if (!inCharacterClass && !inQuantifier && !inCaptureGroup && Character.isLetterOrDigit(c))
{
if (last == '\\') // escaped
{
@ -135,9 +212,9 @@ public class RegexPathSpec extends AbstractPathSpec
String sig = signature.toString();
PathSpecGroup group;
if (Pattern.matches("^l*$", sig))
if (Pattern.matches("^l+$", sig))
group = PathSpecGroup.EXACT;
else if (Pattern.matches("^l*g+", sig))
else if (Pattern.matches("^l+g+", sig))
group = PathSpecGroup.PREFIX_GLOB;
else if (Pattern.matches("^g+l+.*", sig))
group = PathSpecGroup.SUFFIX_GLOB;
@ -193,44 +270,19 @@ public class RegexPathSpec extends AbstractPathSpec
@Override
public String getPathInfo(String path)
{
// Path Info only valid for PREFIX_GLOB types
if (_group == PathSpecGroup.PREFIX_GLOB)
{
Matcher matcher = getMatcher(path);
if (matcher.matches())
{
if (matcher.groupCount() >= 1)
{
String pathInfo = matcher.group(1);
if ("".equals(pathInfo))
return "/";
else
return pathInfo;
}
}
}
return null;
MatchedPath matched = matched(path);
if (matched == null)
return null;
return matched.getPathInfo();
}
@Override
public String getPathMatch(String path)
{
Matcher matcher = getMatcher(path);
if (matcher.matches())
{
if (_group == PathSpecGroup.PREFIX_GLOB && matcher.groupCount() >= 1)
{
int idx = matcher.start(1);
if (idx > 0)
{
if (path.charAt(idx - 1) == '/')
idx--;
return path.substring(0, idx);
}
}
return path;
}
return null;
MatchedPath matched = matched(path);
if (matched == null)
return "";
return matched.getPathMatch();
}
@Override
@ -277,74 +329,117 @@ public class RegexPathSpec extends AbstractPathSpec
{
private final RegexPathSpec pathSpec;
private final String path;
private final Matcher matcher;
private String pathMatch;
private String pathInfo;
public RegexMatchedPath(RegexPathSpec regexPathSpec, String path, Matcher matcher)
{
this.pathSpec = regexPathSpec;
this.path = path;
this.matcher = matcher;
calcPathMatchInfo(matcher);
}
private void calcPathMatchInfo(Matcher matcher)
{
int groupCount = matcher.groupCount();
if (groupCount == 0)
{
pathMatch = path;
pathInfo = null;
return;
}
if (groupCount == 1)
{
// we know we are splitting
int idxNameEnd = endOf(matcher, "name");
if (idxNameEnd >= 0)
{
pathMatch = path.substring(0, idxNameEnd);
pathInfo = path.substring(idxNameEnd);
// If split on pathMatch ends with '/'
// AND pathInfo doesn't have one, move the slash to pathInfo only move 1 level
if (pathMatch.length() > 0 && pathMatch.charAt(pathMatch.length() - 1) == '/' &&
!pathInfo.startsWith("/"))
{
pathMatch = pathMatch.substring(0, pathMatch.length() - 1);
pathInfo = '/' + pathInfo;
}
return;
}
// Use start of anonymous group
int idx = matcher.start(1);
if (idx >= 0)
{
pathMatch = path.substring(0, idx);
pathInfo = path.substring(idx);
if (pathMatch.length() > 0 && pathMatch.charAt(pathMatch.length() - 1) == '/' &&
!pathInfo.startsWith("/"))
{
pathMatch = pathMatch.substring(0, pathMatch.length() - 1);
pathInfo = '/' + pathInfo;
}
return;
}
}
// Reach here we have 2+ groups
String gName = valueOf(matcher, "name");
String gInfo = valueOf(matcher, "info");
// if both named groups exist
if (gName != null && gInfo != null)
{
pathMatch = gName;
pathInfo = gInfo;
return;
}
pathMatch = path;
pathInfo = null;
}
private String valueOf(Matcher matcher, String groupName)
{
try
{
return matcher.group(groupName);
}
catch (IllegalArgumentException notFound)
{
return null;
}
}
private int endOf(Matcher matcher, String groupName)
{
try
{
return matcher.end(groupName);
}
catch (IllegalArgumentException notFound)
{
return -2;
}
}
@Override
public String getPathMatch()
{
try
{
String p = matcher.group("name");
if (p != null)
{
return p;
}
}
catch (IllegalArgumentException ignore)
{
// ignore if group name not found.
}
if (pathSpec.getGroup() == PathSpecGroup.PREFIX_GLOB && matcher.groupCount() >= 1)
{
int idx = matcher.start(1);
if (idx > 0)
{
if (this.path.charAt(idx - 1) == '/')
idx--;
return this.path.substring(0, idx);
}
}
// default is the full path
return this.path;
return this.pathMatch;
}
@Override
public String getPathInfo()
{
try
{
String p = matcher.group("info");
if (p != null)
{
return p;
}
}
catch (IllegalArgumentException ignore)
{
// ignore if group info not found.
}
// Path Info only valid for PREFIX_GLOB
if (pathSpec.getGroup() == PathSpecGroup.PREFIX_GLOB && matcher.groupCount() >= 1)
{
String pathInfo = matcher.group(1);
if ("".equals(pathInfo))
return "/";
else
return pathInfo;
}
// default is null
return null;
return this.pathInfo;
}
@Override
@ -353,7 +448,6 @@ public class RegexPathSpec extends AbstractPathSpec
return getClass().getSimpleName() + "[" +
"pathSpec=" + pathSpec +
", path=\"" + path + "\"" +
", matcher=" + matcher +
']';
}
}

View File

@ -13,7 +13,12 @@
package org.eclipse.jetty.http.pathmap;
import java.util.stream.Stream;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.Arguments;
import org.junit.jupiter.params.provider.MethodSource;
import static org.hamcrest.MatcherAssert.assertThat;
import static org.hamcrest.Matchers.equalTo;
@ -23,7 +28,6 @@ import static org.hamcrest.Matchers.nullValue;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertNotNull;
import static org.junit.jupiter.api.Assertions.assertNull;
import static org.junit.jupiter.api.Assertions.assertTrue;
public class RegexPathSpecTest
{
@ -52,6 +56,9 @@ public class RegexPathSpecTest
assertNotMatches(spec, "/aa");
assertNotMatches(spec, "/a/");
assertThat(spec.getPathMatch("/a"), equalTo("/a"));
assertThat(spec.getPathInfo("/a"), nullValue());
}
@Test
@ -73,20 +80,61 @@ public class RegexPathSpecTest
assertNotMatches(spec, "/aa/bb");
assertNotMatches(spec, "/rest/admin/delete");
assertNotMatches(spec, "/rest/list");
assertThat(spec.getPathMatch("/rest/1.0/list"), equalTo("/rest"));
assertThat(spec.getPathInfo("/rest/1.0/list"), equalTo("/1.0/list"));
}
@Test
public void testPathInfo()
public static Stream<Arguments> matchedPathCases()
{
RegexPathSpec spec = new RegexPathSpec("^/test(/.*)?$");
assertTrue(spec.matches("/test/info"));
assertThat(spec.getPathMatch("/test/info"), equalTo("/test"));
assertThat(spec.getPathInfo("/test/info"), equalTo("/info"));
return Stream.of(
// Suffix (with optional capture group)
Arguments.of("^/[Tt]est(/.*)?$", "/test/info", "/test", "/info"),
Arguments.of("^/[Tt]est(/.*)?$", "/test", "/test", null),
Arguments.of("^(.*).do$", "/a.do", "", "/a.do"),
Arguments.of("^(.*).do$", "/a/b/c.do", "", "/a/b/c.do"),
Arguments.of("^(.*).do$", "/abcde.do", "", "/abcde.do"),
// Exact (no capture group)
Arguments.of("^/test/info$", "/test/info", "/test/info", null),
// Middle (with one capture group)
Arguments.of("^/rest/([^/]*)/list$", "/rest/api/list", "/rest", "/api/list"),
Arguments.of("^/rest/([^/]*)/list$", "/rest/1.0/list", "/rest", "/1.0/list"),
// Middle (with two capture groups)
Arguments.of("^/t(.*)/i(.*)$", "/test/info", "/test/info", null),
// Prefix (with optional capture group)
Arguments.of("^/test(/.*)?$", "/test/info", "/test", "/info"),
Arguments.of("^/a/(.*)?$", "/a/b/c/d", "/a", "/b/c/d"),
// Prefix (with many capture groups)
Arguments.of("^/test(/i.*)(/c.*)(/d.*)?$", "/test/info/code", "/test/info/code", null),
Arguments.of("^/test(/i.*)(/c.*)(/d.*)?$", "/test/info/code/data", "/test/info/code/data", null),
Arguments.of("^/test(/i.*)(/c.*)(/d.*)?$", "/test/ice/cream/dip", "/test/ice/cream/dip", null),
// Suffix (with only 1 named capture group of id "name")
Arguments.of("^(?<name>\\/.*)/.*\\.do$", "/test/info/code.do", "/test/info", "/code.do"),
Arguments.of("^(?<name>\\/.*)/.*\\.do$", "/a/b/c/d/e/f/g.do", "/a/b/c/d/e/f", "/g.do"),
// Suffix (with only 1 named capture group of id "info")
Arguments.of("^/.*(?<info>\\/.*\\.do)$", "/test/info/code.do", "/test/info", "/code.do"),
Arguments.of("^/.*(?<info>\\/.*\\.do)$", "/a/b/c/d/e/f/g.do", "/a/b/c/d/e/f", "/g.do"),
// Middle (with 2 named capture groups)
// this is pretty much an all glob signature
Arguments.of("^(?<name>\\/.*)(?<info>\\/.*\\.action)$", "/test/info/code.action", "/test/info", "/code.action"),
Arguments.of("^(?<name>\\/.*)(?<info>\\/.*\\.action)$", "/a/b/c/d/e/f/g.action", "/a/b/c/d/e/f", "/g.action"),
// Named groups with gap in the middle
Arguments.of("^(?<name>\\/.*)/x/(?<info>.*\\.action)$", "/a/b/c/x/e/f/g.action", "/a/b/c", "e/f/g.action"),
// Named groups in opposite order
Arguments.of("^(?<info>\\/.*)/x/(?<name>.*\\.action)$", "/a/b/c/x/e/f/g.action", "e/f/g.action", "/a/b/c")
);
}
spec = new RegexPathSpec("^/[Tt]est(/.*)?$");
assertTrue(spec.matches("/test/info"));
assertThat(spec.getPathMatch("/test/info"), equalTo("/test/info"));
assertThat(spec.getPathInfo("/test/info"), nullValue());
@ParameterizedTest(name = "[{index}] RegexPathSpec(\"{0}\").matched(\"{1}\")")
@MethodSource("matchedPathCases")
public void testMatchedPath(String regex, String input, String expectedPathMatch, String expectedPathInfo)
{
RegexPathSpec spec = new RegexPathSpec(regex);
MatchedPath matched = spec.matched(input);
assertEquals(expectedPathMatch, matched.getPathMatch(),
String.format("RegexPathSpec(\"%s\").matched(\"%s\").getPathMatch()", regex, input));
assertEquals(expectedPathInfo, matched.getPathInfo(),
String.format("RegexPathSpec(\"%s\").matched(\"%s\").getPathInfo()", regex, input));
}
@Test
@ -108,6 +156,9 @@ public class RegexPathSpecTest
assertNotMatches(spec, "/aa/bb");
assertNotMatches(spec, "/rest/admin/delete");
assertNotMatches(spec, "/rest/list");
assertThat(spec.getPathMatch("/rest/1.0/list"), equalTo("/rest/1.0/list"));
assertThat(spec.getPathInfo("/rest/1.0/list"), nullValue());
}
@Test
@ -126,6 +177,9 @@ public class RegexPathSpecTest
assertNotMatches(spec, "/a");
assertNotMatches(spec, "/aa");
assertNotMatches(spec, "/aa/bb");
assertThat(spec.getPathMatch("/a/b/c/d/e"), equalTo("/a"));
assertThat(spec.getPathInfo("/b/c/d/e"), nullValue());
}
@Test
@ -147,8 +201,8 @@ public class RegexPathSpecTest
assertNotMatches(spec, "/aa/bb");
assertNotMatches(spec, "/aa/bb.do/more");
assertThat(spec.getPathMatch("/a/b/c.do"), equalTo("/a/b/c.do"));
assertThat(spec.getPathInfo("/a/b/c.do"), nullValue());
assertThat(spec.getPathMatch("/a/b/c.do"), equalTo(""));
assertThat(spec.getPathInfo("/a/b/c.do"), equalTo("/a/b/c.do"));
}
/**

View File

@ -92,10 +92,10 @@ public class RegexServletTest
String response = _connector.getResponse("GET /ctx/forward/ignore HTTP/1.0\r\n\r\n");
assertThat(response, containsString(" 200 OK"));
assertThat(response, containsString("contextPath='/ctx'"));
assertThat(response, containsString("servletPath='/Test/info'"));
assertThat(response, containsString("pathInfo='null'"));
assertThat(response, containsString("servletPath='/Test'"));
assertThat(response, containsString("pathInfo='/info'"));
assertThat(response, containsString("mapping.mappingMatch='null'"));
assertThat(response, containsString("mapping.matchValue='Test/info'"));
assertThat(response, containsString("mapping.matchValue='Test'"));
assertThat(response, containsString("mapping.pattern='^/[Tt]est(/.*)?'"));
}