diff --git a/lucene/core/src/java/org/apache/lucene/util/automaton/RegExp.java b/lucene/core/src/java/org/apache/lucene/util/automaton/RegExp.java index bb8fc26281c..0874cde7101 100644 --- a/lucene/core/src/java/org/apache/lucene/util/automaton/RegExp.java +++ b/lucene/core/src/java/org/apache/lucene/util/automaton/RegExp.java @@ -365,8 +365,43 @@ import java.util.Set; */ public class RegExp { - enum Kind { - REGEXP_UNION, REGEXP_CONCATENATION, REGEXP_INTERSECTION, REGEXP_OPTIONAL, REGEXP_REPEAT, REGEXP_REPEAT_MIN, REGEXP_REPEAT_MINMAX, REGEXP_COMPLEMENT, REGEXP_CHAR, REGEXP_CHAR_RANGE, REGEXP_ANYCHAR, REGEXP_EMPTY, REGEXP_STRING, REGEXP_ANYSTRING, REGEXP_AUTOMATON, REGEXP_INTERVAL, + /** + * The type of expression represented by a RegExp node. + */ + public enum Kind { + /** The union of two expressions */ + REGEXP_UNION, + /** A sequence of two expressions */ + REGEXP_CONCATENATION, + /** The intersection of two expressions */ + REGEXP_INTERSECTION, + /** An optional expression */ + REGEXP_OPTIONAL, + /** An expression that repeats */ + REGEXP_REPEAT, + /** An expression that repeats a minimum number of times*/ + REGEXP_REPEAT_MIN, + /** An expression that repeats a minimum and maximum number of times*/ + REGEXP_REPEAT_MINMAX, + /** The complement of an expression */ + REGEXP_COMPLEMENT, + /** A Character */ + REGEXP_CHAR, + /** A Character range*/ + REGEXP_CHAR_RANGE, + /** Any Character allowed*/ + REGEXP_ANYCHAR, + /** An empty expression*/ + REGEXP_EMPTY, + /** A string expression*/ + REGEXP_STRING, + /** Any string allowed */ + REGEXP_ANYSTRING, + /** An Automaton expression*/ + REGEXP_AUTOMATON, + /** An Interval expression */ + REGEXP_INTERVAL, + /** An expression for a pre-defined class e.g. \w */ REGEXP_PRE_CLASS } @@ -411,21 +446,37 @@ public class RegExp { */ public static final int NONE = 0x0000; + //Immutable parsed state + /** + * The type of expression + */ + public final Kind kind; + /** + * Child expressions held by a container type expression + */ + public final RegExp exp1, exp2; + /** + * String expression + */ + public final String s; + /** + * Character expression + */ + public final int c; + /** + * Limits for repeatable type expressions + */ + public final int min, max, digits; + /** + * Extents for range type expressions + */ + public final int from, to; + + // Parser variables private final String originalString; - Kind kind; - RegExp exp1, exp2; - String s; - int c; - int min, max, digits; - int from, to; - int flags; int pos; - - RegExp() { - this.originalString = null; - } - + /** * Constructs new RegExp from a string. Same as * RegExp(s, ALL). @@ -468,6 +519,37 @@ public class RegExp { from = e.from; to = e.to; } + + RegExp(Kind kind, RegExp exp1, RegExp exp2, String s, int c, int min, int max, int digits, int from, int to){ + this.originalString = null; + this.kind = kind; + this.flags = 0; + this.exp1 = exp1; + this.exp2 = exp2; + this.s = s; + this.c = c; + this.min = min; + this.max = max; + this.digits = digits; + this.from = from; + this.to = to; + } + + // Simplified construction of container nodes + static RegExp newContainerNode(Kind kind, RegExp exp1, RegExp exp2) { + return new RegExp(kind, exp1, exp2, null, 0, 0, 0, 0, 0, 0); + } + + // Simplified construction of repeating nodes + static RegExp newRepeatingNode(Kind kind, RegExp exp, int min, int max) { + return new RegExp(kind, exp, null, null, 0, min, max, 0, 0, 0); + } + + + // Simplified construction of leaf nodes + static RegExp newLeafNode(Kind kind, String s, int c, int min, int max, int digits, int from, int to) { + return new RegExp(kind, null, null, s, c, min, max, digits, from, to); + } /** * Constructs new Automaton from this RegExp. Same @@ -919,34 +1001,29 @@ public class RegExp { } static RegExp makeUnion(RegExp exp1, RegExp exp2) { - RegExp r = new RegExp(); - r.kind = Kind.REGEXP_UNION; - r.exp1 = exp1; - r.exp2 = exp2; - return r; + return newContainerNode(Kind.REGEXP_UNION, exp1, exp2); } static RegExp makeConcatenation(RegExp exp1, RegExp exp2) { if ((exp1.kind == Kind.REGEXP_CHAR || exp1.kind == Kind.REGEXP_STRING) && (exp2.kind == Kind.REGEXP_CHAR || exp2.kind == Kind.REGEXP_STRING)) return makeString( exp1, exp2); - RegExp r = new RegExp(); - r.kind = Kind.REGEXP_CONCATENATION; + RegExp rexp1, rexp2; if (exp1.kind == Kind.REGEXP_CONCATENATION && (exp1.exp2.kind == Kind.REGEXP_CHAR || exp1.exp2.kind == Kind.REGEXP_STRING) && (exp2.kind == Kind.REGEXP_CHAR || exp2.kind == Kind.REGEXP_STRING)) { - r.exp1 = exp1.exp1; - r.exp2 = makeString(exp1.exp2, exp2); + rexp1 = exp1.exp1; + rexp2 = makeString(exp1.exp2, exp2); } else if ((exp1.kind == Kind.REGEXP_CHAR || exp1.kind == Kind.REGEXP_STRING) && exp2.kind == Kind.REGEXP_CONCATENATION && (exp2.exp1.kind == Kind.REGEXP_CHAR || exp2.exp1.kind == Kind.REGEXP_STRING)) { - r.exp1 = makeString(exp1, exp2.exp1); - r.exp2 = exp2.exp2; + rexp1 = makeString(exp1, exp2.exp1); + rexp2 = exp2.exp2; } else { - r.exp1 = exp1; - r.exp2 = exp2; + rexp1 = exp1; + rexp2 = exp2; } - return r; + return newContainerNode(Kind.REGEXP_CONCATENATION, rexp1, rexp2); } static private RegExp makeString(RegExp exp1, RegExp exp2) { @@ -959,107 +1036,61 @@ public class RegExp { } static RegExp makeIntersection(RegExp exp1, RegExp exp2) { - RegExp r = new RegExp(); - r.kind = Kind.REGEXP_INTERSECTION; - r.exp1 = exp1; - r.exp2 = exp2; - return r; + return newContainerNode(Kind.REGEXP_INTERSECTION, exp1, exp2); } static RegExp makeOptional(RegExp exp) { - RegExp r = new RegExp(); - r.kind = Kind.REGEXP_OPTIONAL; - r.exp1 = exp; - return r; + return newContainerNode(Kind.REGEXP_OPTIONAL, exp, null); } static RegExp makeRepeat(RegExp exp) { - RegExp r = new RegExp(); - r.kind = Kind.REGEXP_REPEAT; - r.exp1 = exp; - return r; + return newContainerNode(Kind.REGEXP_REPEAT, exp, null); } static RegExp makeRepeat(RegExp exp, int min) { - RegExp r = new RegExp(); - r.kind = Kind.REGEXP_REPEAT_MIN; - r.exp1 = exp; - r.min = min; - return r; + return newRepeatingNode(Kind.REGEXP_REPEAT_MIN, exp, min, 0); } static RegExp makeRepeat(RegExp exp, int min, int max) { - RegExp r = new RegExp(); - r.kind = Kind.REGEXP_REPEAT_MINMAX; - r.exp1 = exp; - r.min = min; - r.max = max; - return r; + return newRepeatingNode(Kind.REGEXP_REPEAT_MINMAX, exp, min, max); } static RegExp makeComplement(RegExp exp) { - RegExp r = new RegExp(); - r.kind = Kind.REGEXP_COMPLEMENT; - r.exp1 = exp; - return r; + return newContainerNode(Kind.REGEXP_COMPLEMENT, exp, null); } static RegExp makeChar(int c) { - RegExp r = new RegExp(); - r.kind = Kind.REGEXP_CHAR; - r.c = c; - return r; + return newLeafNode(Kind.REGEXP_CHAR, null, c, 0, 0, 0, 0, 0); } static RegExp makeCharRange(int from, int to) { if (from > to) throw new IllegalArgumentException("invalid range: from (" + from + ") cannot be > to (" + to + ")"); - RegExp r = new RegExp(); - r.kind = Kind.REGEXP_CHAR_RANGE; - r.from = from; - r.to = to; - return r; + return newLeafNode(Kind.REGEXP_CHAR_RANGE, null, 0, 0, 0, 0, from, to); } static RegExp makeAnyChar() { - RegExp r = new RegExp(); - r.kind = Kind.REGEXP_ANYCHAR; - return r; + return newContainerNode(Kind.REGEXP_ANYCHAR, null, null); } static RegExp makeEmpty() { - RegExp r = new RegExp(); - r.kind = Kind.REGEXP_EMPTY; - return r; + return newContainerNode(Kind.REGEXP_EMPTY, null, null); } static RegExp makeString(String s) { - RegExp r = new RegExp(); - r.kind = Kind.REGEXP_STRING; - r.s = s; - return r; + return newLeafNode(Kind.REGEXP_STRING, s, 0, 0, 0, 0, 0, 0); } static RegExp makeAnyString() { - RegExp r = new RegExp(); - r.kind = Kind.REGEXP_ANYSTRING; - return r; + return newContainerNode(Kind.REGEXP_ANYSTRING, null, null); } static RegExp makeAutomaton(String s) { - RegExp r = new RegExp(); - r.kind = Kind.REGEXP_AUTOMATON; - r.s = s; - return r; + return newLeafNode(Kind.REGEXP_AUTOMATON, s, 0, 0, 0, 0, 0, 0); } static RegExp makeInterval(int min, int max, int digits) { - RegExp r = new RegExp(); - r.kind = Kind.REGEXP_INTERVAL; - r.min = min; - r.max = max; - r.digits = digits; - return r; + return newLeafNode(Kind.REGEXP_INTERVAL, null, 0, min, max, digits, 0, 0); } private boolean peek(String s) { @@ -1201,10 +1232,7 @@ public class RegExp { //See https://docs.oracle.com/javase/tutorial/essential/regex/pre_char_classes.html if (match('\\')) { if (peek("dDwWsS")) { - RegExp re =new RegExp(); - re.kind = Kind.REGEXP_PRE_CLASS; - re.from = next(); - return re; + return newLeafNode(Kind.REGEXP_PRE_CLASS, null, 0, 0, 0, 0, next(), 0); } if (peek("\\")) {