From 1b4f94194242c024d9a4ee27de343596b74b6e33 Mon Sep 17 00:00:00 2001 From: Joakim Erdfelt Date: Thu, 16 Jun 2022 13:53:04 -0500 Subject: [PATCH] RegexPathSpec documentation and MatchedPath improvements (#8163) * More documentation Signed-off-by: Joakim Erdfelt --- .../jetty/http/pathmap/RegexPathSpec.java | 290 ++++++++++++------ .../jetty/http/pathmap/RegexPathSpecTest.java | 80 ++++- .../jetty/servlet/RegexServletTest.java | 6 +- 3 files changed, 262 insertions(+), 114 deletions(-) diff --git a/jetty-http/src/main/java/org/eclipse/jetty/http/pathmap/RegexPathSpec.java b/jetty-http/src/main/java/org/eclipse/jetty/http/pathmap/RegexPathSpec.java index b65601d738c..f58645e212c 100644 --- a/jetty-http/src/main/java/org/eclipse/jetty/http/pathmap/RegexPathSpec.java +++ b/jetty-http/src/main/java/org/eclipse/jetty/http/pathmap/RegexPathSpec.java @@ -21,6 +21,77 @@ import java.util.regex.Pattern; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +/** + *

+ * RegexPathSpec is a PathSpec implementation for a {@link PathMappings} instance. + *

+ * + *

+ * Supports the standard Java regex found in {@link java.util.regex.Pattern}. + *

+ * + *

+ * Supports {@link PathSpecGroup} for {@link PathSpecGroup#EXACT}, {@link PathSpecGroup#PREFIX_GLOB}, {@link PathSpecGroup#MIDDLE_GLOB}, and {@link PathSpecGroup#SUFFIX_GLOB}. + * This is done by evaluating the signature or the provided regex pattern for what is a literal vs a glob (of any kind). + *

+ * + * + * + *

+ * The use of regex capture groups, regex character classes, regex quantifiers, and regex special contructs + * will be identified as a glob (for signature determination), all other characters are identified + * as literal. The regex {@code ^} beginning of line, and regex {@code $} end of line are ignored. + *

+ * + *

+ * Support for {@link MatchedPath} and PathMatch vs PathInfo + *

+ * + *

+ * There's a few steps in evaluating the matched input path for determining where the split + * in the input path should occur for {@link MatchedPath#getPathMatch()} and {@link MatchedPath#getPathInfo()}. + *

+ * + *
    + *
  1. + * If there are no regex capturing groups, + * the entire path is returned in {@link MatchedPath#getPathMatch()}, + * and a null returned for {@link MatchedPath#getPathInfo()} + *
  2. + *
  3. + * If both the named regex capturing groups {@code name} and {@code info} are present, then + * the {@code name} group is returned in {@link MatchedPath#getPathMatch()} and the + * {@code info} group is returned in {@link MatchedPath#getPathInfo()} + *
  4. + *
  5. + * If there is only 1 regex capturing group + *
      + *
    • + * If the named regex capturing group {@code name} is present, the + * input path up to the end of the capturing group is returned + * in {@link MatchedPath#getPathMatch()} and any following characters (or null) + * are returned in {@link MatchedPath#getPathInfo()} + *
    • + *
    • + * other wise the input path up to the start of the capturing group is returned + * in {@link MatchedPath#getPathMatch()} and any following characters (or null) + * are returned in {@link MatchedPath#getPathInfo()} + *
    • + *
    + * If the split on pathMatch ends with {@code /} AND the pathInfo doesn't start with {@code /} + * then the slash is moved from pathMatch to pathInfo. + *
  6. + *
  7. + * All other RegexPathSpec signatures will return the entire path + * in {@link MatchedPath#getPathMatch()}, and a null returned for {@link MatchedPath#getPathInfo()} + *
  8. + *
+ */ public class RegexPathSpec extends AbstractPathSpec { private static final Logger LOG = LoggerFactory.getLogger(UriTemplatePathSpec.class); @@ -54,8 +125,9 @@ public class RegexPathSpec extends AbstractPathSpec declaration = regex; int specLength = declaration.length(); // build up a simple signature we can use to identify the grouping - boolean inTextList = false; + boolean inCharacterClass = false; boolean inQuantifier = false; + boolean inCaptureGroup = false; StringBuilder signature = new StringBuilder(); int pathDepth = 0; @@ -68,8 +140,6 @@ public class RegexPathSpec extends AbstractPathSpec case '^': // ignore anchors case '$': // ignore anchors case '\'': // ignore escaping - case '(': // ignore grouping - case ')': // ignore grouping break; case '+': // single char quantifier case '?': // single char quantifier @@ -78,25 +148,32 @@ public class RegexPathSpec extends AbstractPathSpec case '.': // any char token signature.append('g'); // glob break; - case '{': + case '(': // in regex capture group + inCaptureGroup = true; + break; + case ')': + inCaptureGroup = false; + signature.append('g'); + break; + case '{': // in regex quantifier inQuantifier = true; break; case '}': inQuantifier = false; break; - case '[': - inTextList = true; + case '[': // in regex character class + inCharacterClass = true; break; case ']': - inTextList = false; + inCharacterClass = false; signature.append('g'); // glob break; case '/': - if (!inTextList && !inQuantifier) + if (!inCharacterClass && !inQuantifier && !inCaptureGroup) pathDepth++; break; default: - if (!inTextList && !inQuantifier && Character.isLetterOrDigit(c)) + if (!inCharacterClass && !inQuantifier && !inCaptureGroup && Character.isLetterOrDigit(c)) { if (last == '\\') // escaped { @@ -135,9 +212,9 @@ public class RegexPathSpec extends AbstractPathSpec String sig = signature.toString(); PathSpecGroup group; - if (Pattern.matches("^l*$", sig)) + if (Pattern.matches("^l+$", sig)) group = PathSpecGroup.EXACT; - else if (Pattern.matches("^l*g+", sig)) + else if (Pattern.matches("^l+g+", sig)) group = PathSpecGroup.PREFIX_GLOB; else if (Pattern.matches("^g+l+.*", sig)) group = PathSpecGroup.SUFFIX_GLOB; @@ -193,44 +270,19 @@ public class RegexPathSpec extends AbstractPathSpec @Override public String getPathInfo(String path) { - // Path Info only valid for PREFIX_GLOB types - if (_group == PathSpecGroup.PREFIX_GLOB) - { - Matcher matcher = getMatcher(path); - if (matcher.matches()) - { - if (matcher.groupCount() >= 1) - { - String pathInfo = matcher.group(1); - if ("".equals(pathInfo)) - return "/"; - else - return pathInfo; - } - } - } - return null; + MatchedPath matched = matched(path); + if (matched == null) + return null; + return matched.getPathInfo(); } @Override public String getPathMatch(String path) { - Matcher matcher = getMatcher(path); - if (matcher.matches()) - { - if (_group == PathSpecGroup.PREFIX_GLOB && matcher.groupCount() >= 1) - { - int idx = matcher.start(1); - if (idx > 0) - { - if (path.charAt(idx - 1) == '/') - idx--; - return path.substring(0, idx); - } - } - return path; - } - return null; + MatchedPath matched = matched(path); + if (matched == null) + return ""; + return matched.getPathMatch(); } @Override @@ -277,74 +329,117 @@ public class RegexPathSpec extends AbstractPathSpec { private final RegexPathSpec pathSpec; private final String path; - private final Matcher matcher; + private String pathMatch; + private String pathInfo; public RegexMatchedPath(RegexPathSpec regexPathSpec, String path, Matcher matcher) { this.pathSpec = regexPathSpec; this.path = path; - this.matcher = matcher; + + calcPathMatchInfo(matcher); + } + + private void calcPathMatchInfo(Matcher matcher) + { + int groupCount = matcher.groupCount(); + + + if (groupCount == 0) + { + pathMatch = path; + pathInfo = null; + return; + } + + if (groupCount == 1) + { + // we know we are splitting + int idxNameEnd = endOf(matcher, "name"); + if (idxNameEnd >= 0) + { + pathMatch = path.substring(0, idxNameEnd); + pathInfo = path.substring(idxNameEnd); + + // If split on pathMatch ends with '/' + // AND pathInfo doesn't have one, move the slash to pathInfo only move 1 level + if (pathMatch.length() > 0 && pathMatch.charAt(pathMatch.length() - 1) == '/' && + !pathInfo.startsWith("/")) + { + pathMatch = pathMatch.substring(0, pathMatch.length() - 1); + pathInfo = '/' + pathInfo; + } + return; + } + + // Use start of anonymous group + int idx = matcher.start(1); + if (idx >= 0) + { + pathMatch = path.substring(0, idx); + pathInfo = path.substring(idx); + + if (pathMatch.length() > 0 && pathMatch.charAt(pathMatch.length() - 1) == '/' && + !pathInfo.startsWith("/")) + { + pathMatch = pathMatch.substring(0, pathMatch.length() - 1); + pathInfo = '/' + pathInfo; + } + return; + } + } + + // Reach here we have 2+ groups + + String gName = valueOf(matcher, "name"); + String gInfo = valueOf(matcher, "info"); + + // if both named groups exist + if (gName != null && gInfo != null) + { + pathMatch = gName; + pathInfo = gInfo; + return; + } + + pathMatch = path; + pathInfo = null; + } + + private String valueOf(Matcher matcher, String groupName) + { + try + { + return matcher.group(groupName); + } + catch (IllegalArgumentException notFound) + { + return null; + } + } + + private int endOf(Matcher matcher, String groupName) + { + try + { + return matcher.end(groupName); + } + catch (IllegalArgumentException notFound) + { + return -2; + } } @Override public String getPathMatch() { - try - { - String p = matcher.group("name"); - if (p != null) - { - return p; - } - } - catch (IllegalArgumentException ignore) - { - // ignore if group name not found. - } - - if (pathSpec.getGroup() == PathSpecGroup.PREFIX_GLOB && matcher.groupCount() >= 1) - { - int idx = matcher.start(1); - if (idx > 0) - { - if (this.path.charAt(idx - 1) == '/') - idx--; - return this.path.substring(0, idx); - } - } - - // default is the full path - return this.path; + return this.pathMatch; } @Override public String getPathInfo() { - try - { - String p = matcher.group("info"); - if (p != null) - { - return p; - } - } - catch (IllegalArgumentException ignore) - { - // ignore if group info not found. - } - - // Path Info only valid for PREFIX_GLOB - if (pathSpec.getGroup() == PathSpecGroup.PREFIX_GLOB && matcher.groupCount() >= 1) - { - String pathInfo = matcher.group(1); - if ("".equals(pathInfo)) - return "/"; - else - return pathInfo; - } - - // default is null - return null; + return this.pathInfo; } @Override @@ -353,7 +448,6 @@ public class RegexPathSpec extends AbstractPathSpec return getClass().getSimpleName() + "[" + "pathSpec=" + pathSpec + ", path=\"" + path + "\"" + - ", matcher=" + matcher + ']'; } } diff --git a/jetty-http/src/test/java/org/eclipse/jetty/http/pathmap/RegexPathSpecTest.java b/jetty-http/src/test/java/org/eclipse/jetty/http/pathmap/RegexPathSpecTest.java index 79ff15c97b3..474b909acd2 100644 --- a/jetty-http/src/test/java/org/eclipse/jetty/http/pathmap/RegexPathSpecTest.java +++ b/jetty-http/src/test/java/org/eclipse/jetty/http/pathmap/RegexPathSpecTest.java @@ -13,7 +13,12 @@ package org.eclipse.jetty.http.pathmap; +import java.util.stream.Stream; + import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; import static org.hamcrest.MatcherAssert.assertThat; import static org.hamcrest.Matchers.equalTo; @@ -23,7 +28,6 @@ import static org.hamcrest.Matchers.nullValue; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertNotNull; import static org.junit.jupiter.api.Assertions.assertNull; -import static org.junit.jupiter.api.Assertions.assertTrue; public class RegexPathSpecTest { @@ -52,6 +56,9 @@ public class RegexPathSpecTest assertNotMatches(spec, "/aa"); assertNotMatches(spec, "/a/"); + + assertThat(spec.getPathMatch("/a"), equalTo("/a")); + assertThat(spec.getPathInfo("/a"), nullValue()); } @Test @@ -73,20 +80,61 @@ public class RegexPathSpecTest assertNotMatches(spec, "/aa/bb"); assertNotMatches(spec, "/rest/admin/delete"); assertNotMatches(spec, "/rest/list"); + + assertThat(spec.getPathMatch("/rest/1.0/list"), equalTo("/rest")); + assertThat(spec.getPathInfo("/rest/1.0/list"), equalTo("/1.0/list")); } - @Test - public void testPathInfo() + public static Stream matchedPathCases() { - RegexPathSpec spec = new RegexPathSpec("^/test(/.*)?$"); - assertTrue(spec.matches("/test/info")); - assertThat(spec.getPathMatch("/test/info"), equalTo("/test")); - assertThat(spec.getPathInfo("/test/info"), equalTo("/info")); + return Stream.of( + // Suffix (with optional capture group) + Arguments.of("^/[Tt]est(/.*)?$", "/test/info", "/test", "/info"), + Arguments.of("^/[Tt]est(/.*)?$", "/test", "/test", null), + Arguments.of("^(.*).do$", "/a.do", "", "/a.do"), + Arguments.of("^(.*).do$", "/a/b/c.do", "", "/a/b/c.do"), + Arguments.of("^(.*).do$", "/abcde.do", "", "/abcde.do"), + // Exact (no capture group) + Arguments.of("^/test/info$", "/test/info", "/test/info", null), + // Middle (with one capture group) + Arguments.of("^/rest/([^/]*)/list$", "/rest/api/list", "/rest", "/api/list"), + Arguments.of("^/rest/([^/]*)/list$", "/rest/1.0/list", "/rest", "/1.0/list"), + // Middle (with two capture groups) + Arguments.of("^/t(.*)/i(.*)$", "/test/info", "/test/info", null), + // Prefix (with optional capture group) + Arguments.of("^/test(/.*)?$", "/test/info", "/test", "/info"), + Arguments.of("^/a/(.*)?$", "/a/b/c/d", "/a", "/b/c/d"), + // Prefix (with many capture groups) + Arguments.of("^/test(/i.*)(/c.*)(/d.*)?$", "/test/info/code", "/test/info/code", null), + Arguments.of("^/test(/i.*)(/c.*)(/d.*)?$", "/test/info/code/data", "/test/info/code/data", null), + Arguments.of("^/test(/i.*)(/c.*)(/d.*)?$", "/test/ice/cream/dip", "/test/ice/cream/dip", null), + // Suffix (with only 1 named capture group of id "name") + Arguments.of("^(?\\/.*)/.*\\.do$", "/test/info/code.do", "/test/info", "/code.do"), + Arguments.of("^(?\\/.*)/.*\\.do$", "/a/b/c/d/e/f/g.do", "/a/b/c/d/e/f", "/g.do"), + // Suffix (with only 1 named capture group of id "info") + Arguments.of("^/.*(?\\/.*\\.do)$", "/test/info/code.do", "/test/info", "/code.do"), + Arguments.of("^/.*(?\\/.*\\.do)$", "/a/b/c/d/e/f/g.do", "/a/b/c/d/e/f", "/g.do"), + // Middle (with 2 named capture groups) + // this is pretty much an all glob signature + Arguments.of("^(?\\/.*)(?\\/.*\\.action)$", "/test/info/code.action", "/test/info", "/code.action"), + Arguments.of("^(?\\/.*)(?\\/.*\\.action)$", "/a/b/c/d/e/f/g.action", "/a/b/c/d/e/f", "/g.action"), + // Named groups with gap in the middle + Arguments.of("^(?\\/.*)/x/(?.*\\.action)$", "/a/b/c/x/e/f/g.action", "/a/b/c", "e/f/g.action"), + // Named groups in opposite order + Arguments.of("^(?\\/.*)/x/(?.*\\.action)$", "/a/b/c/x/e/f/g.action", "e/f/g.action", "/a/b/c") + ); + } - spec = new RegexPathSpec("^/[Tt]est(/.*)?$"); - assertTrue(spec.matches("/test/info")); - assertThat(spec.getPathMatch("/test/info"), equalTo("/test/info")); - assertThat(spec.getPathInfo("/test/info"), nullValue()); + @ParameterizedTest(name = "[{index}] RegexPathSpec(\"{0}\").matched(\"{1}\")") + @MethodSource("matchedPathCases") + public void testMatchedPath(String regex, String input, String expectedPathMatch, String expectedPathInfo) + { + RegexPathSpec spec = new RegexPathSpec(regex); + MatchedPath matched = spec.matched(input); + assertEquals(expectedPathMatch, matched.getPathMatch(), + String.format("RegexPathSpec(\"%s\").matched(\"%s\").getPathMatch()", regex, input)); + assertEquals(expectedPathInfo, matched.getPathInfo(), + String.format("RegexPathSpec(\"%s\").matched(\"%s\").getPathInfo()", regex, input)); } @Test @@ -108,6 +156,9 @@ public class RegexPathSpecTest assertNotMatches(spec, "/aa/bb"); assertNotMatches(spec, "/rest/admin/delete"); assertNotMatches(spec, "/rest/list"); + + assertThat(spec.getPathMatch("/rest/1.0/list"), equalTo("/rest/1.0/list")); + assertThat(spec.getPathInfo("/rest/1.0/list"), nullValue()); } @Test @@ -126,6 +177,9 @@ public class RegexPathSpecTest assertNotMatches(spec, "/a"); assertNotMatches(spec, "/aa"); assertNotMatches(spec, "/aa/bb"); + + assertThat(spec.getPathMatch("/a/b/c/d/e"), equalTo("/a")); + assertThat(spec.getPathInfo("/b/c/d/e"), nullValue()); } @Test @@ -147,8 +201,8 @@ public class RegexPathSpecTest assertNotMatches(spec, "/aa/bb"); assertNotMatches(spec, "/aa/bb.do/more"); - assertThat(spec.getPathMatch("/a/b/c.do"), equalTo("/a/b/c.do")); - assertThat(spec.getPathInfo("/a/b/c.do"), nullValue()); + assertThat(spec.getPathMatch("/a/b/c.do"), equalTo("")); + assertThat(spec.getPathInfo("/a/b/c.do"), equalTo("/a/b/c.do")); } /** diff --git a/jetty-servlet/src/test/java/org/eclipse/jetty/servlet/RegexServletTest.java b/jetty-servlet/src/test/java/org/eclipse/jetty/servlet/RegexServletTest.java index bbab83b56a8..9a1ec8d2daf 100644 --- a/jetty-servlet/src/test/java/org/eclipse/jetty/servlet/RegexServletTest.java +++ b/jetty-servlet/src/test/java/org/eclipse/jetty/servlet/RegexServletTest.java @@ -92,10 +92,10 @@ public class RegexServletTest String response = _connector.getResponse("GET /ctx/forward/ignore HTTP/1.0\r\n\r\n"); assertThat(response, containsString(" 200 OK")); assertThat(response, containsString("contextPath='/ctx'")); - assertThat(response, containsString("servletPath='/Test/info'")); - assertThat(response, containsString("pathInfo='null'")); + assertThat(response, containsString("servletPath='/Test'")); + assertThat(response, containsString("pathInfo='/info'")); assertThat(response, containsString("mapping.mappingMatch='null'")); - assertThat(response, containsString("mapping.matchValue='Test/info'")); + assertThat(response, containsString("mapping.matchValue='Test'")); assertThat(response, containsString("mapping.pattern='^/[Tt]est(/.*)?'")); }