Improve CharSet testing
bug 22095, from Phil Steitz Rewrite CharSet parsing, much neater and simpler now git-svn-id: https://svn.apache.org/repos/asf/jakarta/commons/proper/lang/trunk@137565 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
34f6fa8a78
commit
d43b319902
|
@ -67,8 +67,9 @@ import java.util.Set;
|
||||||
*
|
*
|
||||||
* @author Henri Yandell
|
* @author Henri Yandell
|
||||||
* @author Stephen Colebourne
|
* @author Stephen Colebourne
|
||||||
|
* @author Phil Steitz
|
||||||
* @since 1.0
|
* @since 1.0
|
||||||
* @version $Id: CharSet.java,v 1.10 2003/08/02 18:18:33 scolebourne Exp $
|
* @version $Id: CharSet.java,v 1.11 2003/08/04 00:50:14 scolebourne Exp $
|
||||||
*/
|
*/
|
||||||
public class CharSet implements Serializable {
|
public class CharSet implements Serializable {
|
||||||
|
|
||||||
|
@ -126,10 +127,26 @@ public class CharSet implements Serializable {
|
||||||
* - set containing all the characters from the individual sets</li>
|
* - set containing all the characters from the individual sets</li>
|
||||||
* </ul>
|
* </ul>
|
||||||
*
|
*
|
||||||
|
* <p>The matching order is:</p>
|
||||||
|
* <ol
|
||||||
|
* <li>Negated multi character range, such as "^a-e"
|
||||||
|
* <li>Ordinary multi character range, such as "a-e"
|
||||||
|
* <li>Negated single character, such as "^a"
|
||||||
|
* <li>Ordinary single character, such as "a"
|
||||||
|
* </ol>
|
||||||
|
* <p>Matching works left to right. Once a match is found the
|
||||||
|
* search starts again from the next character.</p>
|
||||||
|
*
|
||||||
* <p>If the same range is defined twice using the same syntax, only
|
* <p>If the same range is defined twice using the same syntax, only
|
||||||
* one range will be kept.
|
* one range will be kept.
|
||||||
* Thus, "a-ca-c" creates only one range of "a-c".
|
* Thus, "a-ca-c" creates only one range of "a-c".</p>
|
||||||
* However, "a-cabc" creates two ranges as they are defined differently.</p>
|
*
|
||||||
|
* <p>If the start and end of a range are in the wrong order,
|
||||||
|
* they are reversed. Thus "a-e" is the same as "e-a".
|
||||||
|
* As a result, "a-ee-a" would create only one range,
|
||||||
|
* as the "a-e" and "e-a" are the same.</p>
|
||||||
|
*
|
||||||
|
* <p>The set of characters represented is the union of the specified ranges.</p>
|
||||||
*
|
*
|
||||||
* <p>All CharSet objects returned by this method will be immutable.</p>
|
* <p>All CharSet objects returned by this method will be immutable.</p>
|
||||||
*
|
*
|
||||||
|
@ -180,71 +197,26 @@ public class CharSet implements Serializable {
|
||||||
}
|
}
|
||||||
|
|
||||||
int len = str.length();
|
int len = str.length();
|
||||||
switch (len) {
|
int pos = 0;
|
||||||
case 0:
|
while (pos < len) {
|
||||||
// do nothing
|
int remainder = (len - pos);
|
||||||
break;
|
if (remainder >= 4 && str.charAt(pos) == '^' && str.charAt(pos + 2) == '-') {
|
||||||
|
// negated range
|
||||||
case 1:
|
set.add(new CharRange(str.charAt(pos + 1), str.charAt(pos + 3), true));
|
||||||
set.add(new CharRange(str.charAt(0)));
|
pos += 4;
|
||||||
break;
|
} else if (remainder >= 3 && str.charAt(pos + 1) == '-') {
|
||||||
|
// range
|
||||||
default:
|
set.add(new CharRange(str.charAt(pos), str.charAt(pos + 2)));
|
||||||
int start = -1;
|
pos += 3;
|
||||||
boolean negated = false;
|
} else if (remainder >= 2 && str.charAt(pos) == '^') {
|
||||||
for (int i = 0; i < len; i++) {
|
// negated char
|
||||||
char ch = str.charAt(i);
|
set.add(new CharRange(str.charAt(pos + 1), true));
|
||||||
if (ch == '-') {
|
pos += 2;
|
||||||
if (start == -1) {
|
} else {
|
||||||
// dash found not as range separator
|
// char
|
||||||
// treat as ordinary start block char
|
set.add(new CharRange(str.charAt(pos)));
|
||||||
start = ch;
|
pos += 1;
|
||||||
} else if (i == len - 1) {
|
|
||||||
// dash is last character, store two single characters
|
|
||||||
set.add(new CharRange((char) start, (char) start, negated));
|
|
||||||
set.add(DASH);
|
|
||||||
start = -1;
|
|
||||||
negated = false;
|
|
||||||
} else {
|
|
||||||
// range block found, store it
|
|
||||||
set.add(new CharRange((char) start, str.charAt(++i), negated));
|
|
||||||
start = -1;
|
|
||||||
negated = false;
|
|
||||||
}
|
|
||||||
} else if (ch == '^') {
|
|
||||||
if (start == -1) {
|
|
||||||
if (negated) {
|
|
||||||
// double negate, treat second as ordinary start block char
|
|
||||||
start = ch;
|
|
||||||
} else {
|
|
||||||
// negate next block
|
|
||||||
negated = true;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
// previous block has ended, store it
|
|
||||||
set.add(new CharRange((char) start, (char) start, negated));
|
|
||||||
start = -1;
|
|
||||||
negated = true;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
if (start == -1) {
|
|
||||||
// start of block
|
|
||||||
start = ch;
|
|
||||||
} else {
|
|
||||||
// previous block has ended, store it, and start next block
|
|
||||||
set.add(new CharRange((char) start, (char) start, negated));
|
|
||||||
start = ch;
|
|
||||||
negated = false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
// handle leftovers
|
|
||||||
if (start != -1) {
|
|
||||||
set.add(new CharRange((char) start, (char) start, negated));
|
|
||||||
} else if (negated) {
|
|
||||||
set.add(NEGATE);
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -62,8 +62,9 @@ package org.apache.commons.lang;
|
||||||
*
|
*
|
||||||
* @author <a href="bayard@generationjava.com">Henri Yandell</a>
|
* @author <a href="bayard@generationjava.com">Henri Yandell</a>
|
||||||
* @author Stephen Colebourne
|
* @author Stephen Colebourne
|
||||||
|
* @author Phil Steitz
|
||||||
* @since 1.0
|
* @since 1.0
|
||||||
* @version $Id: CharSetUtils.java,v 1.20 2003/08/02 18:18:33 scolebourne Exp $
|
* @version $Id: CharSetUtils.java,v 1.21 2003/08/04 00:50:14 scolebourne Exp $
|
||||||
*/
|
*/
|
||||||
public class CharSetUtils {
|
public class CharSetUtils {
|
||||||
|
|
||||||
|
@ -80,13 +81,12 @@ public class CharSetUtils {
|
||||||
// Factory
|
// Factory
|
||||||
//-----------------------------------------------------------------------
|
//-----------------------------------------------------------------------
|
||||||
/**
|
/**
|
||||||
* <p>Creates a <code>CharSetUtils</code> object which allows a certain amount of
|
* <p>Creates a <code>CharSet</code> instance which allows a certain amount of
|
||||||
* set logic to be performed.</p>
|
* set logic to be performed.</p>
|
||||||
* <p>The syntax is:</p>
|
* <p>The syntax is:</p>
|
||||||
* <ul>
|
* <ul>
|
||||||
* <li>"aeio" which implies 'a','e',..</li>
|
* <li>"aeio" which implies 'a','e',..</li>
|
||||||
* <li>"^e" implies not e. However it only negates, it's not
|
* <li>"^e" implies not e.</li>
|
||||||
* a set in itself due to the size of that set in unicode.</li>
|
|
||||||
* <li>"ej-m" implies e,j->m. e,j,k,l,m.</li>
|
* <li>"ej-m" implies e,j->m. e,j,k,l,m.</li>
|
||||||
* </ul>
|
* </ul>
|
||||||
*
|
*
|
||||||
|
@ -94,6 +94,7 @@ public class CharSetUtils {
|
||||||
* CharSetUtils.evaluateSet(null) = null
|
* CharSetUtils.evaluateSet(null) = null
|
||||||
* CharSetUtils.evaluateSet("") = CharSet matching nothing
|
* CharSetUtils.evaluateSet("") = CharSet matching nothing
|
||||||
* CharSetUtils.evaluateSet("a-e") = CharSet matching a,b,c,d,e
|
* CharSetUtils.evaluateSet("a-e") = CharSet matching a,b,c,d,e
|
||||||
|
* CharSetUtils.evaluateSet("abe-g") = CharSet matching a,b,e,f,g
|
||||||
* </pre>
|
* </pre>
|
||||||
*
|
*
|
||||||
* @param set the set, may be null
|
* @param set the set, may be null
|
||||||
|
@ -109,13 +110,12 @@ public class CharSetUtils {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* <p>Creates a <code>CharSetUtils</code> object which allows a certain amount of
|
* <p>Creates a <code>CharSet</code> instance which allows a certain amount of
|
||||||
* set logic to be performed.</p>
|
* set logic to be performed.</p>
|
||||||
* <p>The syntax is:</p>
|
* <p>The syntax is:</p>
|
||||||
* <ul>
|
* <ul>
|
||||||
* <li>"aeio" which implies 'a','e',..</li>
|
* <li>"aeio" which implies 'a','e',..</li>
|
||||||
* <li>"^e" implies not e. However it only negates, it's not
|
* <li>"^e" implies not e.</li>
|
||||||
* a set in itself due to the size of that set in unicode.</li>
|
|
||||||
* <li>"ej-m" implies e,j->m. e,j,k,l,m.</li>
|
* <li>"ej-m" implies e,j->m. e,j,k,l,m.</li>
|
||||||
* </ul>
|
* </ul>
|
||||||
*
|
*
|
||||||
|
|
|
@ -64,7 +64,8 @@ import junit.textui.TestRunner;
|
||||||
* Unit tests {@link org.apache.commons.lang.CharSet}.
|
* Unit tests {@link org.apache.commons.lang.CharSet}.
|
||||||
*
|
*
|
||||||
* @author Stephen Colebourne
|
* @author Stephen Colebourne
|
||||||
* @version $Id: CharSetTest.java,v 1.1 2003/08/02 18:18:33 scolebourne Exp $
|
* @author Phil Steitz
|
||||||
|
* @version $Id: CharSetTest.java,v 1.2 2003/08/04 00:50:14 scolebourne Exp $
|
||||||
*/
|
*/
|
||||||
public class CharSetTest extends TestCase {
|
public class CharSetTest extends TestCase {
|
||||||
|
|
||||||
|
@ -278,59 +279,107 @@ public class CharSetTest extends TestCase {
|
||||||
set = CharSet.getInstance("^");
|
set = CharSet.getInstance("^");
|
||||||
array = set.getCharRanges();
|
array = set.getCharRanges();
|
||||||
assertEquals(1, array.length);
|
assertEquals(1, array.length);
|
||||||
assertEquals(true, ArrayUtils.contains(array, new CharRange('^')));
|
assertEquals(true, ArrayUtils.contains(array, new CharRange('^'))); // "^"
|
||||||
|
|
||||||
set = CharSet.getInstance("^^");
|
set = CharSet.getInstance("^^");
|
||||||
array = set.getCharRanges();
|
array = set.getCharRanges();
|
||||||
assertEquals(1, array.length);
|
assertEquals(1, array.length);
|
||||||
assertEquals(true, ArrayUtils.contains(array, new CharRange('^', '^', true)));
|
assertEquals(true, ArrayUtils.contains(array, new CharRange('^', '^', true))); // "^^"
|
||||||
|
|
||||||
set = CharSet.getInstance("^^^");
|
set = CharSet.getInstance("^^^");
|
||||||
array = set.getCharRanges();
|
array = set.getCharRanges();
|
||||||
assertEquals(2, array.length);
|
assertEquals(2, array.length);
|
||||||
assertEquals(true, ArrayUtils.contains(array, new CharRange('^', '^', true)));
|
assertEquals(true, ArrayUtils.contains(array, new CharRange('^', '^', true))); // "^^"
|
||||||
assertEquals(true, ArrayUtils.contains(array, new CharRange('^', '^')));
|
assertEquals(true, ArrayUtils.contains(array, new CharRange('^', '^'))); // "^"
|
||||||
|
|
||||||
set = CharSet.getInstance("^^^^");
|
set = CharSet.getInstance("^^^^");
|
||||||
array = set.getCharRanges();
|
array = set.getCharRanges();
|
||||||
assertEquals(1, array.length);
|
assertEquals(1, array.length);
|
||||||
assertEquals(true, ArrayUtils.contains(array, new CharRange('^', '^', true)));
|
assertEquals(true, ArrayUtils.contains(array, new CharRange('^', '^', true))); // "^^" x2
|
||||||
|
|
||||||
set = CharSet.getInstance("a^");
|
set = CharSet.getInstance("a^");
|
||||||
array = set.getCharRanges();
|
array = set.getCharRanges();
|
||||||
assertEquals(2, array.length);
|
assertEquals(2, array.length);
|
||||||
assertEquals(true, ArrayUtils.contains(array, new CharRange('a')));
|
assertEquals(true, ArrayUtils.contains(array, new CharRange('a'))); // "a"
|
||||||
assertEquals(true, ArrayUtils.contains(array, new CharRange('^')));
|
assertEquals(true, ArrayUtils.contains(array, new CharRange('^'))); // "^"
|
||||||
|
|
||||||
set = CharSet.getInstance("^a-");
|
set = CharSet.getInstance("^a-");
|
||||||
array = set.getCharRanges();
|
array = set.getCharRanges();
|
||||||
assertEquals(2, array.length);
|
assertEquals(2, array.length);
|
||||||
assertEquals(true, ArrayUtils.contains(array, new CharRange('a', 'a', true)));
|
assertEquals(true, ArrayUtils.contains(array, new CharRange('a', 'a', true))); // "^a"
|
||||||
assertEquals(true, ArrayUtils.contains(array, new CharRange('-')));
|
assertEquals(true, ArrayUtils.contains(array, new CharRange('-'))); // "-"
|
||||||
|
|
||||||
set = CharSet.getInstance("^^-c");
|
set = CharSet.getInstance("^^-c");
|
||||||
array = set.getCharRanges();
|
array = set.getCharRanges();
|
||||||
assertEquals(1, array.length);
|
assertEquals(1, array.length);
|
||||||
assertEquals(true, ArrayUtils.contains(array, new CharRange('^', 'c', true)));
|
assertEquals(true, ArrayUtils.contains(array, new CharRange('^', 'c', true))); // "^^-c"
|
||||||
|
|
||||||
set = CharSet.getInstance("^c-^");
|
set = CharSet.getInstance("^c-^");
|
||||||
array = set.getCharRanges();
|
array = set.getCharRanges();
|
||||||
assertEquals(1, array.length);
|
assertEquals(1, array.length);
|
||||||
assertEquals(true, ArrayUtils.contains(array, new CharRange('c', '^', true)));
|
assertEquals(true, ArrayUtils.contains(array, new CharRange('c', '^', true))); // "^c-^"
|
||||||
|
|
||||||
set = CharSet.getInstance("^c-^d");
|
set = CharSet.getInstance("^c-^d");
|
||||||
array = set.getCharRanges();
|
array = set.getCharRanges();
|
||||||
assertEquals(2, array.length);
|
assertEquals(2, array.length);
|
||||||
assertEquals(true, ArrayUtils.contains(array, new CharRange('c', '^', true)));
|
assertEquals(true, ArrayUtils.contains(array, new CharRange('c', '^', true))); // "^c-^"
|
||||||
assertEquals(true, ArrayUtils.contains(array, new CharRange('d')));
|
assertEquals(true, ArrayUtils.contains(array, new CharRange('d'))); // "d"
|
||||||
|
|
||||||
set = CharSet.getInstance("^^-");
|
set = CharSet.getInstance("^^-");
|
||||||
array = set.getCharRanges();
|
array = set.getCharRanges();
|
||||||
assertEquals(2, array.length);
|
assertEquals(2, array.length);
|
||||||
assertEquals(true, ArrayUtils.contains(array, new CharRange('^', '^', true)));
|
assertEquals(true, ArrayUtils.contains(array, new CharRange('^', '^', true))); // "^^"
|
||||||
assertEquals(true, ArrayUtils.contains(array, new CharRange('-')));
|
assertEquals(true, ArrayUtils.contains(array, new CharRange('-'))); // "-"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testConstructor_String_oddCombinations() {
|
||||||
|
CharSet set;
|
||||||
|
CharRange[] array = null;
|
||||||
|
|
||||||
|
set = CharSet.getInstance("a-^c");
|
||||||
|
array = set.getCharRanges();
|
||||||
|
assertEquals(true, ArrayUtils.contains(array, new CharRange('a', '^'))); // "a-^"
|
||||||
|
assertEquals(true, ArrayUtils.contains(array, new CharRange('c'))); // "c"
|
||||||
|
assertEquals(false, set.contains('b'));
|
||||||
|
assertEquals(true, set.contains('^'));
|
||||||
|
assertEquals(true, set.contains('_')); // between ^ and a
|
||||||
|
assertEquals(true, set.contains('c'));
|
||||||
|
|
||||||
|
set = CharSet.getInstance("^a-^c");
|
||||||
|
array = set.getCharRanges();
|
||||||
|
assertEquals(true, ArrayUtils.contains(array, new CharRange('a', '^', true))); // "^a-^"
|
||||||
|
assertEquals(true, ArrayUtils.contains(array, new CharRange('c'))); // "c"
|
||||||
|
assertEquals(true, set.contains('b'));
|
||||||
|
assertEquals(false, set.contains('^'));
|
||||||
|
assertEquals(false, set.contains('_')); // between ^ and a
|
||||||
|
|
||||||
|
set = CharSet.getInstance("a- ^-- "); //contains everything
|
||||||
|
array = set.getCharRanges();
|
||||||
|
assertEquals(true, ArrayUtils.contains(array, new CharRange('a', ' '))); // "a- "
|
||||||
|
assertEquals(true, ArrayUtils.contains(array, new CharRange('-', ' ', true))); // "^-- "
|
||||||
|
assertEquals(true, set.contains('#'));
|
||||||
|
assertEquals(true, set.contains('^'));
|
||||||
|
assertEquals(true, set.contains('a'));
|
||||||
|
assertEquals(true, set.contains('*'));
|
||||||
|
assertEquals(true, set.contains('A'));
|
||||||
|
|
||||||
|
set = CharSet.getInstance("^-b");
|
||||||
|
array = set.getCharRanges();
|
||||||
|
assertEquals(true, ArrayUtils.contains(array, new CharRange('^','b'))); // "^-b"
|
||||||
|
assertEquals(true, set.contains('b'));
|
||||||
|
assertEquals(true, set.contains('_')); // between ^ and a
|
||||||
|
assertEquals(false, set.contains('A'));
|
||||||
|
assertEquals(true, set.contains('^'));
|
||||||
|
|
||||||
|
set = CharSet.getInstance("b-^");
|
||||||
|
array = set.getCharRanges();
|
||||||
|
assertEquals(true, ArrayUtils.contains(array, new CharRange('^','b'))); // "b-^"
|
||||||
|
assertEquals(true, set.contains('b'));
|
||||||
|
assertEquals(true, set.contains('^'));
|
||||||
|
assertEquals(true, set.contains('a')); // between ^ and b
|
||||||
|
assertEquals(false, set.contains('c'));
|
||||||
|
}
|
||||||
|
|
||||||
//-----------------------------------------------------------------------
|
//-----------------------------------------------------------------------
|
||||||
public void testEquals_Object() {
|
public void testEquals_Object() {
|
||||||
CharSet abc = CharSet.getInstance("abc");
|
CharSet abc = CharSet.getInstance("abc");
|
||||||
|
@ -377,6 +426,7 @@ public class CharSetTest extends TestCase {
|
||||||
//-----------------------------------------------------------------------
|
//-----------------------------------------------------------------------
|
||||||
public void testContains_Char() {
|
public void testContains_Char() {
|
||||||
CharSet btod = CharSet.getInstance("b-d");
|
CharSet btod = CharSet.getInstance("b-d");
|
||||||
|
CharSet dtob = CharSet.getInstance("d-b");
|
||||||
CharSet bcd = CharSet.getInstance("bcd");
|
CharSet bcd = CharSet.getInstance("bcd");
|
||||||
CharSet bd = CharSet.getInstance("bd");
|
CharSet bd = CharSet.getInstance("bd");
|
||||||
CharSet notbtod = CharSet.getInstance("^b-d");
|
CharSet notbtod = CharSet.getInstance("^b-d");
|
||||||
|
@ -404,6 +454,16 @@ public class CharSetTest extends TestCase {
|
||||||
assertEquals(false, notbtod.contains('c'));
|
assertEquals(false, notbtod.contains('c'));
|
||||||
assertEquals(false, notbtod.contains('d'));
|
assertEquals(false, notbtod.contains('d'));
|
||||||
assertEquals(true, notbtod.contains('e'));
|
assertEquals(true, notbtod.contains('e'));
|
||||||
|
|
||||||
|
assertEquals(false, dtob.contains('a'));
|
||||||
|
assertEquals(true, dtob.contains('b'));
|
||||||
|
assertEquals(true, dtob.contains('c'));
|
||||||
|
assertEquals(true, dtob.contains('d'));
|
||||||
|
assertEquals(false, dtob.contains('e'));
|
||||||
|
|
||||||
|
CharRange[] array = dtob.getCharRanges();
|
||||||
|
assertEquals("[b-d]", dtob.toString());
|
||||||
|
assertEquals(1, array.length);
|
||||||
}
|
}
|
||||||
|
|
||||||
//-----------------------------------------------------------------------
|
//-----------------------------------------------------------------------
|
||||||
|
|
Loading…
Reference in New Issue