From 913d6bc08604253108f4bbd6f79eb6655f7d5c0d Mon Sep 17 00:00:00 2001 From: PJ Fanning Date: Wed, 22 Nov 2017 01:07:24 +0000 Subject: [PATCH] use codepoint iterator in a few places git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1815998 13f79535-47bb-0310-9956-ffa450edef68 --- .../apache/poi/ss/format/CellFormatPart.java | 74 ++++++++++--------- .../poi/util/StringCodepointsIterable.java | 56 ++++++++++++++ src/java/org/apache/poi/util/StringUtil.java | 7 +- 3 files changed, 98 insertions(+), 39 deletions(-) create mode 100644 src/java/org/apache/poi/util/StringCodepointsIterable.java diff --git a/src/java/org/apache/poi/ss/format/CellFormatPart.java b/src/java/org/apache/poi/ss/format/CellFormatPart.java index 6ddb07efc8..2651edd8c6 100644 --- a/src/java/org/apache/poi/ss/format/CellFormatPart.java +++ b/src/java/org/apache/poi/ss/format/CellFormatPart.java @@ -18,14 +18,14 @@ package org.apache.poi.ss.format; import org.apache.poi.hssf.util.HSSFColor; import org.apache.poi.util.LocaleUtil; +import org.apache.poi.util.StringCodepointsIterable; import org.apache.poi.util.StringUtil; import javax.swing.*; import java.awt.*; -import java.util.Locale; -import java.util.Map; -import java.util.TreeMap; +import java.util.*; +import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -337,47 +337,47 @@ public class CellFormatPart { boolean seenZero = false; while (m.find()) { String repl = m.group(0); - - if (repl.length() > 0) { - char c1 = repl.charAt(0); - char c2 = 0; - if (repl.length() > 1) - c2 = StringUtil.toLowerCase(repl.charAt(1)).charAt(0); + Iterator codePoints = new StringCodepointsIterable(repl).iterator(); + if (codePoints.hasNext()) { + String c1 = codePoints.next(); + String c2 = null; + if (codePoints.hasNext()) + c2 = codePoints.next().toLowerCase(Locale.ROOT); switch (c1) { - case '@': + case "@": return CellFormatType.TEXT; - case 'd': - case 'D': - case 'y': - case 'Y': + case "d": + case "D": + case "y": + case "Y": return CellFormatType.DATE; - case 'h': - case 'H': - case 'm': - case 'M': - case 's': - case 'S': + case "h": + case "H": + case "m": + case "M": + case "s": + case "S": // These can be part of date, or elapsed couldBeDate = true; break; - case '0': + case "0": // This can be part of date, elapsed, or number seenZero = true; break; - case '[': - if (c2 == 'h' || c2 == 'm' || c2 == 's') { + case "[": + if ("h".equals(c2) || "m".equals(c2) || "s".equals(c2)) { return CellFormatType.ELAPSED; } - if (c2 == '$') { + if ("$".equals(c2)) { // Localised currency return CellFormatType.NUMBER; } // Something else inside [] which isn't supported! throw new IllegalArgumentException("Unsupported [] format block '" + repl + "' in '" + fdesc + "' with c2: " + c2); - case '#': - case '?': + case "#": + case "?": return CellFormatType.NUMBER; } } @@ -405,19 +405,20 @@ public class CellFormatPart { */ static String quoteSpecial(String repl, CellFormatType type) { StringBuilder sb = new StringBuilder(); - for (int i = 0; i < repl.length(); i++) { - char ch = repl.charAt(i); - if (ch == '\'' && type.isSpecial('\'')) { + Iterator codePoints = new StringCodepointsIterable(repl).iterator(); + while (codePoints.hasNext()) { + String ch = codePoints.next(); + if ("\'".equals(ch) && type.isSpecial('\'')) { sb.append('\u0000'); continue; } - boolean special = type.isSpecial(ch); + boolean special = type.isSpecial(ch.charAt(0)); if (special) - sb.append("'"); + sb.append("\'"); sb.append(ch); if (special) - sb.append("'"); + sb.append("\'"); } return sb.toString(); } @@ -559,10 +560,11 @@ public class CellFormatPart { * @return The character repeated three times. */ static String expandChar(String part) { - String repl; - char ch = part.charAt(1); - repl = "" + ch + ch + ch; - return repl; + List codePoints = new ArrayList<>(); + new StringCodepointsIterable(part).iterator().forEachRemaining(codePoints::add); + if (codePoints.size() < 2) throw new IllegalArgumentException("Expected part string to have at least 2 chars"); + String ch = codePoints.get(1); + return ch + ch + ch; } /** diff --git a/src/java/org/apache/poi/util/StringCodepointsIterable.java b/src/java/org/apache/poi/util/StringCodepointsIterable.java new file mode 100644 index 0000000000..a56a6ae6c1 --- /dev/null +++ b/src/java/org/apache/poi/util/StringCodepointsIterable.java @@ -0,0 +1,56 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ + +package org.apache.poi.util; + +import java.util.Iterator; + +// based on https://gist.github.com/EmmanuelOga/48df70b27ead4d80234b +@Internal +public class StringCodepointsIterable implements Iterable { + private class StringCodepointsIterator implements Iterator { + private int index = 0; + + @Override + public void remove() { + throw new UnsupportedOperationException(); + } + + @Override + public boolean hasNext() { + return index < StringCodepointsIterable.this.string.length(); + } + + @Override + public String next() { + int codePoint = StringCodepointsIterable.this.string.codePointAt(index); + index += Character.charCount(codePoint); + return new String(Character.toChars(codePoint)); + } + } + + private final String string; + + public StringCodepointsIterable(final String string) { + this.string = string; + } + + @Override + public Iterator iterator() { + return new StringCodepointsIterator(); + } +} \ No newline at end of file diff --git a/src/java/org/apache/poi/util/StringUtil.java b/src/java/org/apache/poi/util/StringUtil.java index 8ba6be56ff..b45c15f2bd 100644 --- a/src/java/org/apache/poi/util/StringUtil.java +++ b/src/java/org/apache/poi/util/StringUtil.java @@ -20,6 +20,7 @@ package org.apache.poi.util; import java.nio.charset.Charset; import java.util.HashMap; import java.util.Iterator; +import java.util.Locale; import java.util.Map; /** @@ -347,18 +348,18 @@ public class StringUtil { @Internal public static String toLowerCase(char c) { - return Character.toString(c).toLowerCase(LocaleUtil.getUserLocale()); + return Character.toString(c).toLowerCase(Locale.ROOT); } @Internal public static String toUpperCase(char c) { - return Character.toString(c).toUpperCase(LocaleUtil.getUserLocale()); + return Character.toString(c).toUpperCase(Locale.ROOT); } @Internal public static boolean isUpperCase(char c) { String s = Character.toString(c); - return s.toUpperCase(LocaleUtil.getUserLocale()).equals(s); + return s.toUpperCase(Locale.ROOT).equals(s); } /**