Adding an OctalUnescaper to handle Java's support of 1->377 Octal values. LANG-646
git-svn-id: https://svn.apache.org/repos/asf/commons/proper/lang/trunk@1059753 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
2270d830fd
commit
345b4836f6
|
@ -24,6 +24,7 @@ import org.apache.commons.lang3.text.translate.CharSequenceTranslator;
|
|||
import org.apache.commons.lang3.text.translate.EntityArrays;
|
||||
import org.apache.commons.lang3.text.translate.LookupTranslator;
|
||||
import org.apache.commons.lang3.text.translate.NumericEntityUnescaper;
|
||||
import org.apache.commons.lang3.text.translate.OctalUnescaper;
|
||||
import org.apache.commons.lang3.text.translate.UnicodeEscaper;
|
||||
import org.apache.commons.lang3.text.translate.UnicodeUnescaper;
|
||||
|
||||
|
@ -128,6 +129,7 @@ public class StringEscapeUtils {
|
|||
// TODO: throw "illegal character: \92" as an Exception if a \ on the end of the Java (as per the compiler)?
|
||||
public static final CharSequenceTranslator UNESCAPE_JAVA =
|
||||
new AggregateTranslator(
|
||||
new OctalUnescaper(), // .between('\1', '\377'),
|
||||
new UnicodeUnescaper(),
|
||||
new LookupTranslator(EntityArrays.JAVA_CTRL_CHARS_UNESCAPE()),
|
||||
new LookupTranslator(
|
||||
|
|
|
@ -0,0 +1,61 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.commons.lang3.text.translate;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.Writer;
|
||||
|
||||
/**
|
||||
* Translate escaped octal Strings back to their octal values.
|
||||
*
|
||||
* For example, "\45" should go back to being the specific value (a %).
|
||||
*
|
||||
* Note that this currently only supports the viable range of octal for Java; namely
|
||||
* 1 to 377. This is both because parsing Java is the main use case and Integer.parseInt
|
||||
* throws an exception when values are larger than octal 377.
|
||||
*
|
||||
* @author Apache Software Foundation
|
||||
* @since 3.0
|
||||
* @version $Id: OctalUnescaper.java 967237 2010-07-23 20:08:57Z mbenson $
|
||||
*/
|
||||
public class OctalUnescaper extends CharSequenceTranslator {
|
||||
|
||||
private static int OCTAL_MAX = 377;
|
||||
|
||||
/**
|
||||
* {@inheritDoc}
|
||||
*/
|
||||
@Override
|
||||
public int translate(CharSequence input, int index, Writer out) throws IOException {
|
||||
if(input.charAt(index) == '\\' && index < (input.length() - 1) && Character.isDigit(input.charAt(index + 1)) ) {
|
||||
int start = index + 1;
|
||||
|
||||
int end = index + 2;
|
||||
while ( end < input.length() && Character.isDigit(input.charAt(end)) ) {
|
||||
end++;
|
||||
if ( Integer.parseInt(input.subSequence(start, end).toString(), 10) > OCTAL_MAX) {
|
||||
end--; // rollback
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
out.write( Integer.parseInt(input.subSequence(start, end).toString(), 8) );
|
||||
return 1 + end - start;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,56 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.commons.lang3.text.translate;
|
||||
|
||||
import junit.framework.TestCase;
|
||||
|
||||
/**
|
||||
* Unit tests for {@link org.apache.commons.lang3.text.translate.OctalUnescaper}.
|
||||
* @version $Id: OctalUnescaperTest.java 979392 2010-07-26 18:09:52Z mbenson $
|
||||
*/
|
||||
public class OctalUnescaperTest extends TestCase {
|
||||
|
||||
public void testBetween() {
|
||||
OctalUnescaper oue = new OctalUnescaper(); //.between("1", "377");
|
||||
|
||||
String input = "\\45";
|
||||
String result = oue.translate(input);
|
||||
assertEquals("Failed to unescape octal characters via the between method", "\45", result);
|
||||
|
||||
input = "\\377";
|
||||
result = oue.translate(input);
|
||||
assertEquals("Failed to unescape octal characters via the between method", "\377", result);
|
||||
|
||||
input = "\\377 and";
|
||||
result = oue.translate(input);
|
||||
assertEquals("Failed to unescape octal characters via the between method", "\377 and", result);
|
||||
|
||||
input = "\\378 and";
|
||||
result = oue.translate(input);
|
||||
assertEquals("Failed to unescape octal characters via the between method", "\378 and", result);
|
||||
|
||||
input = "\\378";
|
||||
result = oue.translate(input);
|
||||
assertEquals("Failed to unescape octal characters via the between method", "\378", result);
|
||||
|
||||
input = "\\1";
|
||||
result = oue.translate(input);
|
||||
assertEquals("Failed to unescape octal characters via the between method", "\1", result);
|
||||
}
|
||||
|
||||
}
|
Loading…
Reference in New Issue