Function PROPER: Don't use a regular expression syntax that is not available on Android. Actually the use of regular expression in this case was overkill, slow and even incorrect in corner cases, the function can be done without regex easily and runs a bit faster anyway (microBench is down from 4sec to 2s)

Also added a unit-test to ensure the behavior stays the same (except in cases where it needed fixing, e.g. some toUppercase() leads to more than one character which was handled incorrectly)

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1752779 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Dominik Stadler 2016-07-15 06:32:45 +00:00
parent dee56fc6c0
commit 726dc7a578
2 changed files with 79 additions and 24 deletions

View File

@ -17,18 +17,11 @@
package org.apache.poi.ss.formula.functions;
import java.util.Locale;
import java.util.regex.Pattern;
import org.apache.poi.ss.formula.eval.BoolEval;
import org.apache.poi.ss.formula.eval.ErrorEval;
import org.apache.poi.ss.formula.eval.EvaluationException;
import org.apache.poi.ss.formula.eval.NumberEval;
import org.apache.poi.ss.formula.eval.OperandResolver;
import org.apache.poi.ss.formula.eval.StringEval;
import org.apache.poi.ss.formula.eval.ValueEval;
import org.apache.poi.ss.formula.eval.*;
import org.apache.poi.ss.usermodel.DataFormatter;
import java.util.Locale;
/**
* @author Amol S. Deshmukh < amolweb at ya hoo dot com >
* @author Josh Micich
@ -36,18 +29,17 @@ import org.apache.poi.ss.usermodel.DataFormatter;
*/
public abstract class TextFunction implements Function {
protected static final DataFormatter formatter = new DataFormatter();
protected static final String EMPTY_STRING = "";
protected static final String evaluateStringArg(ValueEval eval, int srcRow, int srcCol) throws EvaluationException {
protected static String evaluateStringArg(ValueEval eval, int srcRow, int srcCol) throws EvaluationException {
ValueEval ve = OperandResolver.getSingleValue(eval, srcRow, srcCol);
return OperandResolver.coerceValueToString(ve);
}
protected static final int evaluateIntArg(ValueEval arg, int srcCellRow, int srcCellCol) throws EvaluationException {
protected static int evaluateIntArg(ValueEval arg, int srcCellRow, int srcCellCol) throws EvaluationException {
ValueEval ve = OperandResolver.getSingleValue(arg, srcCellRow, srcCellCol);
return OperandResolver.coerceValueToInt(ve);
}
protected static final double evaluateDoubleArg(ValueEval arg, int srcCellRow, int srcCellCol) throws EvaluationException {
protected static double evaluateDoubleArg(ValueEval arg, int srcCellRow, int srcCellCol) throws EvaluationException {
ValueEval ve = OperandResolver.getSingleValue(arg, srcCellRow, srcCellCol);
return OperandResolver.coerceValueToDouble(ve);
}
@ -122,21 +114,23 @@ public abstract class TextFunction implements Function {
* making the first letter upper and the rest lower case.
*/
public static final Function PROPER = new SingleArgTextFunc() {
final Pattern nonAlphabeticPattern = Pattern.compile("\\P{IsL}");
protected ValueEval evaluate(String text) {
StringBuilder sb = new StringBuilder();
boolean shouldMakeUppercase = true;
final String lowercaseText = text.toLowerCase(Locale.ROOT);
final String uppercaseText = text.toUpperCase(Locale.ROOT);
final int length = text.length();
for(int i = 0; i < length; ++i) {
final char ch = text.charAt(i);
// Note: we are using String.toUpperCase() here on purpose as it handles certain things
// better than Character.toUpperCase(), e.g. German "scharfes s" is translated
// to "SS" (i.e. two characters), if upercased properly!
if (shouldMakeUppercase) {
sb.append(uppercaseText.charAt(i));
sb.append(String.valueOf(ch).toUpperCase(Locale.ROOT));
}
else {
sb.append(lowercaseText.charAt(i));
sb.append(String.valueOf(ch).toLowerCase(Locale.ROOT));
}
shouldMakeUppercase = nonAlphabeticPattern.matcher(text.subSequence(i, i + 1)).matches();
shouldMakeUppercase = !Character.isLetter(ch);
}
return new StringEval(sb.toString());
}
@ -184,8 +178,7 @@ public abstract class TextFunction implements Function {
* @return whether the character is printable
*/
private boolean isPrintable(char c){
int charCode = c;
return charCode >= 32;
return (int) c >= 32;
}
};
@ -274,9 +267,9 @@ public abstract class TextFunction implements Function {
public ValueEval evaluate(ValueEval[] args, int srcRowIndex, int srcColumnIndex) {
StringBuilder sb = new StringBuilder();
for (int i=0, iSize=args.length; i<iSize; i++) {
for (ValueEval arg : args) {
try {
sb.append(evaluateStringArg(args[i], srcRowIndex, srcColumnIndex));
sb.append(evaluateStringArg(arg, srcRowIndex, srcColumnIndex));
} catch (EvaluationException e) {
return e.getErrorEval();
}

View File

@ -0,0 +1,62 @@
/* ====================================================================
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==================================================================== */
package org.apache.poi.ss.formula.functions;
import org.apache.poi.ss.formula.eval.StringEval;
import org.apache.poi.ss.formula.eval.ValueEval;
import org.junit.Test;
import static org.junit.Assert.assertEquals;
public class TestProper {
@Test
public void test() {
checkProper("", "");
checkProper("a", "A");
checkProper("abc", "Abc");
checkProper("abc abc", "Abc Abc");
checkProper("abc/abc", "Abc/Abc");
checkProper("ABC/ABC", "Abc/Abc");
checkProper("aBc/ABC", "Abc/Abc");
checkProper("aBc@#$%^&*()_+=-ABC", "Abc@#$%^&*()_+=-Abc");
checkProper("aBc25aerg/ABC", "Abc25Aerg/Abc");
checkProper("aBc/\u00C4\u00F6\u00DF\u00FC/ABC", "Abc/\u00C4\u00F6\u00DF\u00FC/Abc"); // Some German umlauts with uppercase first letter is not changed
checkProper("\u00FC", "\u00DC");
checkProper("\u00DC", "\u00DC");
checkProper("\u00DF", "SS"); // German "scharfes s" is uppercased to "SS"
checkProper("aBc/\u00FC\u00C4\u00F6\u00DF\u00FC/ABC", "Abc/\u00DC\u00E4\u00F6\u00DF\u00FC/Abc"); // Some German umlauts with lowercase first letter is changed to uppercase
}
@Test
public void testMicroBenchmark() {
ValueEval strArg = new StringEval("some longer text that needs a number of replacements to check for runtime of different implementations");
long start = System.currentTimeMillis();
for(int i = 0;i < 300000;i++) {
final ValueEval ret = TextFunction.PROPER.evaluate(new ValueEval[]{strArg}, 0, 0);
assertEquals("Some Longer Text That Needs A Number Of Replacements To Check For Runtime Of Different Implementations", ((StringEval)ret).getStringValue());
}
// Took aprox. 600ms on a decent Laptop in July 2016
System.out.println("Took: " + (System.currentTimeMillis() - start) + "ms");
}
private void checkProper(String input, String expected) {
ValueEval strArg = new StringEval(input);
final ValueEval ret = TextFunction.PROPER.evaluate(new ValueEval[]{strArg}, 0, 0);
assertEquals(expected, ((StringEval)ret).getStringValue());
}
}