mirror of https://github.com/apache/lucene.git
LUCENE-2568: fix AutomatonTestUtil.RandomAcceptedString to not return invalid (containing code points in the UTF16 surrogates range) UTF32
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@979639 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
c76e15a33d
commit
96a6898ce7
|
@ -77,12 +77,44 @@ public class AutomatonTestUtil {
|
|||
return new String(buffer, 0, end);
|
||||
}
|
||||
|
||||
// picks a random int code point that this transition
|
||||
// accepts, avoiding the surrogates range since they are
|
||||
// "defined" in UTF32. Don't call this on a transition
|
||||
// that only accepts UTF16 surrogate values!!
|
||||
// picks a random int code point, avoiding surrogates;
|
||||
// throws IllegalArgumentException if this transition only
|
||||
// accepts surrogates
|
||||
private static int getRandomCodePoint(final Random r, final Transition t) {
|
||||
return t.min+r.nextInt(t.max-t.min+1);
|
||||
final int code;
|
||||
if (t.max < UnicodeUtil.UNI_SUR_HIGH_START ||
|
||||
t.min > UnicodeUtil.UNI_SUR_HIGH_END) {
|
||||
// easy: entire range is before or after surrogates
|
||||
code = t.min+r.nextInt(t.max-t.min+1);
|
||||
} else if (t.min >= UnicodeUtil.UNI_SUR_HIGH_START) {
|
||||
if (t.max > UnicodeUtil.UNI_SUR_LOW_END) {
|
||||
// after surrogates
|
||||
code = 1+UnicodeUtil.UNI_SUR_LOW_END+r.nextInt(t.max-UnicodeUtil.UNI_SUR_LOW_END+1);
|
||||
} else {
|
||||
throw new IllegalArgumentException("transition accepts only surrogates: " + t);
|
||||
}
|
||||
} else if (t.max <= UnicodeUtil.UNI_SUR_LOW_END) {
|
||||
if (t.min < UnicodeUtil.UNI_SUR_HIGH_START) {
|
||||
// before surrogates
|
||||
code = t.min + r.nextInt(UnicodeUtil.UNI_SUR_HIGH_START - t.min);
|
||||
} else {
|
||||
throw new IllegalArgumentException("transition accepts only surrogates: " + t);
|
||||
}
|
||||
} else {
|
||||
// range includes all surrogates
|
||||
int gap1 = UnicodeUtil.UNI_SUR_HIGH_START - t.min;
|
||||
int gap2 = t.max - UnicodeUtil.UNI_SUR_LOW_END;
|
||||
int c = r.nextInt(gap1+gap2);
|
||||
if (c < gap1) {
|
||||
code = t.min + c;
|
||||
} else {
|
||||
code = UnicodeUtil.UNI_SUR_LOW_END + c - gap1 + 1;
|
||||
}
|
||||
}
|
||||
|
||||
assert code >= t.min && code <= t.max && (code < UnicodeUtil.UNI_SUR_HIGH_START || code > UnicodeUtil.UNI_SUR_LOW_END):
|
||||
"code=" + code + " min=" + t.min + " max=" + t.max;
|
||||
return code;
|
||||
}
|
||||
|
||||
public static class RandomAcceptedStrings {
|
||||
|
@ -206,7 +238,6 @@ public class AutomatonTestUtil {
|
|||
} else {
|
||||
t = s.transitionsArray[r.nextInt(s.numTransitions)];
|
||||
}
|
||||
|
||||
soFar.add(getRandomCodePoint(r, t));
|
||||
s = t.to;
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue