diff --git a/src/integrationtest/org/apache/poi/stress/AbstractFileHandler.java b/src/integrationtest/org/apache/poi/stress/AbstractFileHandler.java index fbe37a4b0b..2b1114ab46 100644 --- a/src/integrationtest/org/apache/poi/stress/AbstractFileHandler.java +++ b/src/integrationtest/org/apache/poi/stress/AbstractFileHandler.java @@ -31,9 +31,11 @@ import java.util.Set; import org.apache.poi.EncryptedDocumentException; import org.apache.poi.extractor.POIOLE2TextExtractor; import org.apache.poi.extractor.POITextExtractor; +import org.apache.poi.hssf.extractor.EventBasedExcelExtractor; import org.apache.poi.ooxml.extractor.ExtractorFactory; import org.apache.poi.hpsf.extractor.HPSFPropertiesExtractor; import org.apache.poi.openxml4j.exceptions.OpenXML4JException; +import org.apache.poi.ss.extractor.ExcelExtractor; import org.apache.poi.util.IOUtils; import org.apache.xmlbeans.XmlException; @@ -83,6 +85,7 @@ public abstract class AbstractFileHandler implements FileHandler { long modified = file.lastModified(); POITextExtractor extractor = null; + String fileAndParentName = file.getParentFile().getName() + "/" + file.getName(); try { extractor = ExtractorFactory.createExtractor(file); assertNotNull("Should get a POITextExtractor but had none for file " + file, extractor); @@ -95,7 +98,7 @@ public abstract class AbstractFileHandler implements FileHandler { assertNotNull(metadataExtractor.getText()); assertFalse("Expected Extraction to fail for file " + file + " and handler " + this + ", but did not fail!", - EXPECTED_EXTRACTOR_FAILURES.contains(file.getParentFile().getName() + "/" + file.getName())); + EXPECTED_EXTRACTOR_FAILURES.contains(fileAndParentName)); assertEquals("File should not be modified by extractor", length, file.length()); assertEquals("File should not be modified by extractor", modified, file.lastModified()); @@ -111,8 +114,24 @@ public abstract class AbstractFileHandler implements FileHandler { assertNotNull(text); } } + + // test again with including formulas and cell-comments as this caused some bugs + if(extractor instanceof ExcelExtractor && + // comment-extraction and formula extraction are not well supported in event based extraction + !(extractor instanceof EventBasedExcelExtractor)) { + ((ExcelExtractor)extractor).setFormulasNotResults(true); + + String text = extractor.getText(); + assertNotNull(text); + // */ + + ((ExcelExtractor) extractor).setIncludeCellComments(true); + + text = extractor.getText(); + assertNotNull(text); + } } catch (IllegalArgumentException e) { - if(!EXPECTED_EXTRACTOR_FAILURES.contains(file.getParentFile().getName() + "/" + file.getName())) { + if(!EXPECTED_EXTRACTOR_FAILURES.contains(fileAndParentName)) { throw e; } } catch (EncryptedDocumentException e) { diff --git a/src/java/org/apache/poi/ss/formula/function/FunctionDataBuilder.java b/src/java/org/apache/poi/ss/formula/function/FunctionDataBuilder.java index c535bf4ca2..0be53f555e 100644 --- a/src/java/org/apache/poi/ss/formula/function/FunctionDataBuilder.java +++ b/src/java/org/apache/poi/ss/formula/function/FunctionDataBuilder.java @@ -81,8 +81,7 @@ final class FunctionDataBuilder { FunctionMetadata[] jumbledArray = new FunctionMetadata[_functionDataByName.size()]; _functionDataByName.values().toArray(jumbledArray); FunctionMetadata[] fdIndexArray = new FunctionMetadata[_maxFunctionIndex+1]; - for (int i = 0; i < jumbledArray.length; i++) { - FunctionMetadata fd = jumbledArray[i]; + for (FunctionMetadata fd : jumbledArray) { fdIndexArray[fd.getIndex()] = fd; } diff --git a/src/java/org/apache/poi/ss/formula/function/FunctionMetadata.java b/src/java/org/apache/poi/ss/formula/function/FunctionMetadata.java index 6ab06f0ad8..fa8ca34aca 100644 --- a/src/java/org/apache/poi/ss/formula/function/FunctionMetadata.java +++ b/src/java/org/apache/poi/ss/formula/function/FunctionMetadata.java @@ -31,6 +31,7 @@ public final class FunctionMetadata { * to make that file more version neutral. * @see org.apache.poi.ss.formula.FormulaParser#validateNumArgs(int, FunctionMetadata) */ + @SuppressWarnings("JavadocReference") private static final short FUNCTION_MAX_PARAMS = 30; private final int _index; @@ -49,27 +50,35 @@ public final class FunctionMetadata { _returnClassCode = returnClassCode; _parameterClassCodes = (parameterClassCodes == null) ? null : parameterClassCodes.clone(); } + public int getIndex() { return _index; } + public String getName() { return _name; } + public int getMinParams() { return _minParams; } + public int getMaxParams() { return _maxParams; } + public boolean hasFixedArgsLength() { return _minParams == _maxParams; } + public byte getReturnClassCode() { return _returnClassCode; } + public byte[] getParameterClassCodes() { return _parameterClassCodes.clone(); } + /** * Some varags functions (like VLOOKUP) have a specific limit to the number of arguments that * can be passed. Other functions (like SUM) don't have such a limit. For those functions, @@ -80,11 +89,8 @@ public final class FunctionMetadata { public boolean hasUnlimitedVarags() { return FUNCTION_MAX_PARAMS == _maxParams; } + public String toString() { - StringBuffer sb = new StringBuffer(64); - sb.append(getClass().getName()).append(" ["); - sb.append(_index).append(" ").append(_name); - sb.append("]"); - return sb.toString(); + return getClass().getName() + " [" + _index + " " + _name + "]"; } } diff --git a/src/java/org/apache/poi/ss/formula/function/FunctionMetadataReader.java b/src/java/org/apache/poi/ss/formula/function/FunctionMetadataReader.java index dc6a8c0255..dc83e9c601 100644 --- a/src/java/org/apache/poi/ss/formula/function/FunctionMetadataReader.java +++ b/src/java/org/apache/poi/ss/formula/function/FunctionMetadataReader.java @@ -41,6 +41,7 @@ final class FunctionMetadataReader { private static final int MAX_RECORD_LENGTH = 100_000; private static final String METADATA_FILE_NAME = "functionMetadata.txt"; + private static final String METADATA_FILE_NAME_CETAB = "functionMetadataCetab.txt"; /** plain ASCII text metadata file uses three dots for ellipsis */ private static final String ELLIPSIS = "..."; @@ -52,51 +53,56 @@ final class FunctionMetadataReader { private static final String[] DIGIT_ENDING_FUNCTION_NAMES = { // Digits at the end of a function might be due to a left-over footnote marker. // except in these cases - "LOG10", "ATAN2", "DAYS360", "SUMXMY2", "SUMX2MY2", "SUMX2PY2", + "LOG10", "ATAN2", "DAYS360", "SUMXMY2", "SUMX2MY2", "SUMX2PY2", "A1.R1C1", }; private static final Set DIGIT_ENDING_FUNCTION_NAMES_SET = new HashSet<>(Arrays.asList(DIGIT_ENDING_FUNCTION_NAMES)); public static FunctionMetadataRegistry createRegistry() { - try { - InputStream is = FunctionMetadataReader.class.getResourceAsStream(METADATA_FILE_NAME); - if (is == null) { - throw new RuntimeException("resource '" + METADATA_FILE_NAME + "' not found"); - } - - try { - try(BufferedReader br = new BufferedReader(new InputStreamReader(is, StandardCharsets.UTF_8))) { - FunctionDataBuilder fdb = new FunctionDataBuilder(400); - - while (true) { - String line = br.readLine(); - if (line == null) { - break; - } - if (line.length() < 1 || line.charAt(0) == '#') { - continue; - } - String trimLine = line.trim(); - if (trimLine.length() < 1) { - continue; - } - processLine(fdb, line); - } + FunctionDataBuilder fdb = new FunctionDataBuilder(800); + readResourceFile(fdb, METADATA_FILE_NAME); + return fdb.build(); + } - return fdb.build(); - } - } finally { - is.close(); - } - } catch (IOException e) { - throw new RuntimeException(e); - } + public static FunctionMetadataRegistry createRegistryCetab() { + FunctionDataBuilder fdb = new FunctionDataBuilder(800); + readResourceFile(fdb, METADATA_FILE_NAME_CETAB); + return fdb.build(); + } + + private static void readResourceFile(FunctionDataBuilder fdb, String resourceFile) { + try (InputStream is = FunctionMetadataReader.class.getResourceAsStream(resourceFile)) { + if (is == null) { + throw new RuntimeException("resource '" + resourceFile + "' not found"); + } + + try(BufferedReader br = new BufferedReader(new InputStreamReader(is, StandardCharsets.UTF_8))) { + + while (true) { + String line = br.readLine(); + if (line == null) { + break; + } + if (line.length() < 1 || line.charAt(0) == '#') { + continue; + } + String trimLine = line.trim(); + if (trimLine.length() < 1) { + continue; + } + processLine(fdb, line); + } + } + } catch (IOException e) { + throw new RuntimeException(e); + } } private static void processLine(FunctionDataBuilder fdb, String line) { String[] parts = TAB_DELIM_PATTERN.split(line, -2); if(parts.length != 8) { - throw new RuntimeException("Bad line format '" + line + "' - expected 8 data fields"); + throw new RuntimeException("Bad line format '" + line + "' - expected 8 data fields delimited by tab, " + + "but had " + parts.length + ": " + Arrays.toString(parts)); } int functionIndex = parseInt(parts[0]); String functionName = parts[1]; diff --git a/src/java/org/apache/poi/ss/formula/function/FunctionMetadataRegistry.java b/src/java/org/apache/poi/ss/formula/function/FunctionMetadataRegistry.java index 34317fe6d8..bb01b41163 100644 --- a/src/java/org/apache/poi/ss/formula/function/FunctionMetadataRegistry.java +++ b/src/java/org/apache/poi/ss/formula/function/FunctionMetadataRegistry.java @@ -37,6 +37,7 @@ public final class FunctionMetadataRegistry { public static final short FUNCTION_INDEX_EXTERNAL = 255; private static FunctionMetadataRegistry _instance; + private static FunctionMetadataRegistry _instanceCetab; private final FunctionMetadata[] _functionDataByIndex; private final Map _functionDataByName; @@ -48,6 +49,13 @@ public final class FunctionMetadataRegistry { return _instance; } + private static FunctionMetadataRegistry getInstanceCetab() { + if (_instanceCetab == null) { + _instanceCetab = FunctionMetadataReader.createRegistryCetab(); + } + return _instanceCetab; + } + /* package */ FunctionMetadataRegistry(FunctionMetadata[] functionDataByIndex, Map functionDataByName) { _functionDataByIndex = (functionDataByIndex == null) ? null : functionDataByIndex.clone(); _functionDataByName = functionDataByName; @@ -62,6 +70,10 @@ public final class FunctionMetadataRegistry { return getInstance().getFunctionByIndexInternal(index); } + public static FunctionMetadata getCetabFunctionByIndex(int index) { + return getInstanceCetab().getFunctionByIndexInternal(index); + } + private FunctionMetadata getFunctionByIndexInternal(int index) { return _functionDataByIndex[index]; } @@ -74,7 +86,11 @@ public final class FunctionMetadataRegistry { public static short lookupIndexByName(String name) { FunctionMetadata fd = getInstance().getFunctionByNameInternal(name); if (fd == null) { - return -1; + // also try the cetab functions + fd = getInstanceCetab().getFunctionByNameInternal(name); + if (fd == null) { + return -1; + } } return (short) fd.getIndex(); } @@ -83,8 +99,12 @@ public final class FunctionMetadataRegistry { return _functionDataByName.get(name); } - public static FunctionMetadata getFunctionByName(String name) { - return getInstance().getFunctionByNameInternal(name); + FunctionMetadata fm = getInstance().getFunctionByNameInternal(name); + if(fm == null) { + return getInstanceCetab().getFunctionByNameInternal(name); + } + + return fm; } } diff --git a/src/java/org/apache/poi/ss/formula/ptg/AbstractFunctionPtg.java b/src/java/org/apache/poi/ss/formula/ptg/AbstractFunctionPtg.java index 28b5e7d9e5..bfb7aeba6a 100644 --- a/src/java/org/apache/poi/ss/formula/ptg/AbstractFunctionPtg.java +++ b/src/java/org/apache/poi/ss/formula/ptg/AbstractFunctionPtg.java @@ -123,13 +123,22 @@ public abstract class AbstractFunctionPtg extends OperationPtg { return ix >= 0; } - protected final String lookupName(short index) { + protected String lookupName(short index) { + return lookupName(index, false); + } + + protected final String lookupName(short index, boolean isCetab) { if(index == FunctionMetadataRegistry.FUNCTION_INDEX_EXTERNAL) { return "#external#"; } - FunctionMetadata fm = FunctionMetadataRegistry.getFunctionByIndex(index); + final FunctionMetadata fm; + if(isCetab) { + fm = FunctionMetadataRegistry.getCetabFunctionByIndex(index); + } else { + fm = FunctionMetadataRegistry.getFunctionByIndex(index); + } if(fm == null) { - throw new RuntimeException("bad function index (" + index + ")"); + throw new RuntimeException("bad function index (" + index + ", " + isCetab + ")"); } return fm.getName(); } diff --git a/src/java/org/apache/poi/ss/formula/ptg/ExpPtg.java b/src/java/org/apache/poi/ss/formula/ptg/ExpPtg.java index d128be9951..83ebc2044d 100644 --- a/src/java/org/apache/poi/ss/formula/ptg/ExpPtg.java +++ b/src/java/org/apache/poi/ss/formula/ptg/ExpPtg.java @@ -69,9 +69,6 @@ public final class ExpPtg extends ControlPtg { @Override public String toString() { - StringBuffer buffer = new StringBuffer("[Array Formula or Shared Formula]\n"); - buffer.append("row = ").append(getRow()).append("\n"); - buffer.append("col = ").append(getColumn()).append("\n"); - return buffer.toString(); + return "[Array Formula or Shared Formula]\n" + "row = " + getRow() + "\n" + "col = " + getColumn() + "\n"; } } diff --git a/src/java/org/apache/poi/ss/formula/ptg/FuncVarPtg.java b/src/java/org/apache/poi/ss/formula/ptg/FuncVarPtg.java index 3392d04a76..a1cacd8ba5 100644 --- a/src/java/org/apache/poi/ss/formula/ptg/FuncVarPtg.java +++ b/src/java/org/apache/poi/ss/formula/ptg/FuncVarPtg.java @@ -18,6 +18,8 @@ package org.apache.poi.ss.formula.ptg; import org.apache.poi.ss.formula.function.FunctionMetadata; import org.apache.poi.ss.formula.function.FunctionMetadataRegistry; +import org.apache.poi.util.BitField; +import org.apache.poi.util.BitFieldFactory; import org.apache.poi.util.LittleEndianInput; import org.apache.poi.util.LittleEndianOutput; @@ -25,24 +27,29 @@ import org.apache.poi.util.LittleEndianOutput; * @author Jason Height (jheight at chariot dot net dot au) */ public final class FuncVarPtg extends AbstractFunctionPtg{ - public final static byte sid = 0x22; private final static int SIZE = 4; + // See spec at 2.5.198.63 PtgFuncVar + private static final BitField ceFunc = BitFieldFactory.getInstance(0xF000); + /** * Single instance of this token for 'sum() taking a single argument' */ public static final OperationPtg SUM = FuncVarPtg.create("SUM", 1); - private FuncVarPtg(int functionIndex, int returnClass, byte[] paramClasses, int numArgs) { + private final boolean _isCetab; + + private FuncVarPtg(int functionIndex, int returnClass, byte[] paramClasses, int numArgs, boolean isCetab) { super(functionIndex, returnClass, paramClasses, numArgs); + _isCetab = isCetab; } /**Creates new function pointer from a byte array * usually called while reading an excel file. */ public static FuncVarPtg create(LittleEndianInput in) { - return create(in.readByte(), in.readShort()); + return create(in.readByte(), in.readUShort()); } /** @@ -53,12 +60,25 @@ public final class FuncVarPtg extends AbstractFunctionPtg{ } private static FuncVarPtg create(int numArgs, int functionIndex) { - FunctionMetadata fm = FunctionMetadataRegistry.getFunctionByIndex(functionIndex); - if(fm == null) { - // Happens only as a result of a call to FormulaParser.parse(), with a non-built-in function name - return new FuncVarPtg(functionIndex, Ptg.CLASS_VALUE, new byte[] {Ptg.CLASS_VALUE}, numArgs); + final FunctionMetadata fm; + boolean isCetab = ceFunc.isSet(functionIndex); + if(isCetab) { + functionIndex = ceFunc.clear(functionIndex); + fm = FunctionMetadataRegistry.getCetabFunctionByIndex(functionIndex); + } else { + fm = FunctionMetadataRegistry.getFunctionByIndex(functionIndex); } - return new FuncVarPtg(functionIndex, fm.getReturnClassCode(), fm.getParameterClassCodes(), numArgs); + + if (fm == null) { + // Happens only as a result of a call to FormulaParser.parse(), with a non-built-in function name + return new FuncVarPtg(functionIndex, Ptg.CLASS_VALUE, new byte[]{Ptg.CLASS_VALUE}, numArgs, isCetab); + } + return new FuncVarPtg(functionIndex, fm.getReturnClassCode(), fm.getParameterClassCodes(), numArgs, isCetab); + } + + @Override + protected String lookupName(short index) { + return lookupName(index, _isCetab); } public void write(LittleEndianOutput out) { diff --git a/src/resources/main/org/apache/poi/ss/formula/function/functionMetadata.txt b/src/resources/main/org/apache/poi/ss/formula/function/functionMetadata.txt index f9e91a1e3b..e3c8fc4d74 100644 --- a/src/resources/main/org/apache/poi/ss/formula/function/functionMetadata.txt +++ b/src/resources/main/org/apache/poi/ss/formula/function/functionMetadata.txt @@ -74,6 +74,8 @@ 50 TREND 1 3 A R R R x 51 LOGEST 1 2 A R R x 52 GROWTH 1 3 A R R R x +53 GOTO 1 1 R R +55 RETURN 1 1 V V 56 PV 3 5 V V V V V V # Built-In Sheet Functions in BIFF2 57 FV 3 5 V V V V V V @@ -98,8 +100,13 @@ 76 ROWS 1 1 V A 77 COLUMNS 1 1 V A 78 OFFSET 3 5 R R V V V V x +79 ABSREF 2 2 R V R +80 RELREF 2 2 R V V +81 ARGUMENT 0 3 V V V R 82 SEARCH 2 3 V V V V 83 TRANSPOSE 1 1 A A +84 ERROR 0 2 V V R +85 STEP 0 0 86 TYPE 1 1 V V 97 ATAN2 2 2 V V V 98 ASIN 1 1 V V @@ -109,6 +116,7 @@ 102 VLOOKUP 3 3 V V R R x 105 ISREF 1 1 V R 109 LOG 1 2 V V V +110 EXEC 1 4 V V V V V 111 CHAR 1 1 V V 112 LOWER 1 1 V V 113 UPPER 1 1 V V @@ -134,6 +142,7 @@ 143 SYD 4 4 V V V V V 144 DDB 4 5 V V V V V V 148 INDIRECT 1 2 R V V x +150 CALL 1 3 V V R R 162 CLEAN 1 1 V V 163 MDETERM 1 1 V A 164 MINVERSE 1 1 A A @@ -143,6 +152,10 @@ 169 COUNTA 0 30 V R 183 PRODUCT 0 30 V R 184 FACT 1 1 V V +185 GET.CELL 1 2 V V R +186 GET.WORKSPACE 1 1 V V +187 GET.WINDOW 1 2 V V V +188 GET.DOCUMENT 1 2 V V V 189 DPRODUCT 3 3 V R R R 190 ISNONTEXT 1 1 V V 193 STDEVP 1 30 V R @@ -174,6 +187,7 @@ 220 DAYS360 2 2 V V V x 221 TODAY 0 0 V - x 222 VDB 5 7 V V V V V V V V +225 END.IF 0 0 227 MEDIAN 1 30 V R ... 228 SUMPRODUCT 1 30 V A ... 229 SINH 1 1 V V @@ -187,11 +201,20 @@ # New Built-In Sheet Functions in BIFF4 14 FIXED 1 3 V V V V x 204 USDOLLAR 1 2 V V V x +238 LAST.ERROR 0 0 215 DBCS 1 1 V V x 216 RANK 2 3 V V R V 247 DB 4 5 V V V V V V 252 FREQUENCY 2 2 A R R +257 EVALUATE 1 1 V V 261 ERROR.TYPE 1 1 V V +262 APP.TITLE 0 1 V V +263 WINDOW.TITLE 0 1 V V +264 SAVE.TOOLBAR 0 2 V V V +265 ENABLE.TOOL 3 3 V V V V +266 PRESS.TOOL 3 3 V V V V +267 REGISTER.ID 2 3 V V V V +268 GET.WORKBOOK 1 2 V V V 269 AVEDEV 1 30 V R ... 270 BETADIST 3 5 V V V V V V 271 GAMMALN 1 1 V V @@ -272,13 +295,13 @@ 350 ISPMT 4 4 V V V V V 351 DATEDIF 3 3 V V V V 352 DATESTRING 1 1 V V -353 NUMBERSTRING 2 2 V V V -354 ROMAN 1 2 V V V -# New Built-In Sheet Functions in BIFF8 -358 GETPIVOTDATA 2 30 V V R ... -359 HYPERLINK 1 2 V V V -360 PHONETIC 1 1 V R -361 AVERAGEA 1 30 V R ... +353 NUMBERSTRING 2 2 V V V +354 ROMAN 1 2 V V V +# New Built-In Sheet Functions in BIFF8 +358 GETPIVOTDATA 2 30 V V R ... +359 HYPERLINK 1 2 V V V +360 PHONETIC 1 1 V R +361 AVERAGEA 1 30 V R ... 362 MAXA 1 30 V R ... 363 MINA 1 30 V R ... 364 STDEVPA 1 30 V R ... diff --git a/src/resources/main/org/apache/poi/ss/formula/function/functionMetadataCetab.txt b/src/resources/main/org/apache/poi/ss/formula/function/functionMetadataCetab.txt new file mode 100644 index 0000000000..b0cf329429 --- /dev/null +++ b/src/resources/main/org/apache/poi/ss/formula/function/functionMetadataCetab.txt @@ -0,0 +1,417 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Created by (org.apache.poi.ss.formula.function.ExcelCetabFunctionExtractor) +# from source file 'org/apache/poi/ss/formula/function/functionMetadataCetab-PDF.txt' +# +#Columns: (index, name, minParams, maxParams, returnClass, paramClasses, isVolatile, hasFootnote ) + +# +0 BEEP 0 0 +1 OPEN 0 0 +2 OPEN.LINKS 0 0 +3 CLOSE.ALL 0 0 +4 SAVE 0 0 +5 SAVE.AS 0 0 +6 FILE.DELETE 0 0 +7 PAGE.SETUP 0 0 +8 PRINT 0 0 +9 PRINTER.SETUP 0 0 +10 QUIT 0 0 +11 NEW.WINDOW 0 0 +12 ARRANGE.ALL 0 0 +13 WINDOW.SIZE 0 0 +14 WINDOW.MOVE 0 0 +15 FULL 0 0 +16 CLOSE 0 0 +17 RUN 0 0 +22 SET.PRINT.AREA 0 0 +23 SET.PRINT.TITLES 0 0 +24 SET.PAGE.BREAK 0 0 +25 REMOVE.PAGE.BREAK 0 0 +26 FONT 0 0 +27 DISPLAY 0 0 +28 PROTECT.DOCUMENT 0 0 +29 PRECISION 0 0 +30 A1.R1C1 0 0 +31 CALCULATE.NOW 0 0 +32 CALCULATION 0 0 +34 DATA.FIND 0 0 +35 EXTRACT 0 0 +36 DATA.DELETE 0 0 +37 SET.DATABASE 0 0 +38 SET.CRITERIA 0 0 +39 SORT 0 0 +40 DATA.SERIES 0 0 +41 TABLE 0 0 +42 FORMAT.NUMBER 0 0 +43 ALIGNMENT 0 0 +44 STYLE 0 0 +45 BORDER 0 0 +46 CELL.PROTECTION 0 0 +47 COLUMN.WIDTH 0 0 +48 UNDO 0 0 +49 CUT 0 0 +50 COPY 0 0 +51 PASTE 0 0 +52 CLEAR 0 0 +53 PASTE.SPECIAL 0 0 +54 EDIT.DELETE 0 0 +55 INSERT 0 0 +56 FILL.RIGHT 0 0 +57 FILL.DOWN 0 0 +61 DEFINE.NAME 0 0 +62 CREATE.NAMES 0 0 +63 FORMULA.GOTO 0 0 +64 FORMULA.FIND 0 0 +65 SELECT.LAST.CELL 0 0 +66 SHOW.ACTIVE.CELL 0 0 +67 GALLERY.AREA 0 0 +68 GALLERY.BAR 0 0 +69 GALLERY.COLUMN 0 0 +70 GALLERY.LINE 0 0 +71 GALLERY.PIE 0 0 +72 GALLERY.SCATTER 0 0 +73 COMBINATION 0 0 +74 PREFERRED 0 0 +75 ADD.OVERLAY 0 0 +76 GRIDLINES 0 0 +77 SET.PREFERRED 0 0 +78 AXES 0 0 +79 LEGEND 0 0 +80 ATTACH.TEXT 0 0 +81 ADD.ARROW 0 0 +82 SELECT.CHART 0 0 +83 SELECT.PLOT.AREA 0 0 +84 PATTERNS 0 0 +85 MAIN.CHART 0 0 +86 OVERLAY 0 0 +87 SCALE 0 0 +88 FORMAT.LEGEND 0 0 +89 FORMAT.TEXT 0 0 +90 EDIT.REPEAT 0 0 +91 PARSE 0 0 +92 JUSTIFY 0 0 +93 HIDE 0 0 +94 UNHIDE 0 0 +95 WORKSPACE 0 0 +96 FORMULA 0 0 +97 FORMULA.FILL 0 0 +98 FORMULA.ARRAY 0 0 +99 DATA.FIND.NEXT 0 0 +100 DATA.FIND.PREV 0 0 +101 FORMULA.FIND.NEXT 0 0 +102 FORMULA.FIND.PREV 0 0 +103 ACTIVATE 0 0 +104 ACTIVATE.NEXT 0 0 +105 ACTIVATE.PREV 0 0 +106 UNLOCKED.NEXT 0 0 +107 UNLOCKED.PREV 0 0 +108 COPY.PICTURE 0 0 +109 SELECT 0 0 +110 DELETE.NAME 0 0 +111 DELETE.FORMAT 0 0 +112 VLINE 0 0 +113 HLINE 0 0 +114 VPAGE 0 0 +115 HPAGE 0 0 +116 VSCROLL 0 0 +117 HSCROLL 0 0 +118 ALERT 0 0 +119 NEW 0 0 +120 CANCEL.COPY 0 0 +121 SHOW.CLIPBOARD 0 0 +122 MESSAGE 0 0 +124 PASTE.LINK 0 0 +125 APP.ACTIVATE 0 0 +126 DELETE.ARROW 0 0 +127 ROW.HEIGHT 0 0 +128 FORMAT.MOVE 0 0 +129 FORMAT.SIZE 0 0 +130 FORMULA.REPLACE 0 0 +131 SEND.KEYS 0 0 +132 SELECT.SPECIAL 0 0 +133 APPLY.NAMES 0 0 +134 REPLACE.FONT 0 0 +135 FREEZE.PANES 0 0 +136 SHOW.INFO 0 0 +137 SPLIT 0 0 +138 ON.WINDOW 0 0 +139 ON.DATA 0 0 +140 DISABLE.INPUT 0 0 +142 OUTLINE 0 0 +143 LIST.NAMES 0 0 +144 FILE.CLOSE 0 0 +145 SAVE.WORKBOOK 0 0 +146 DATA.FORM 0 0 +147 COPY.CHART 0 0 +148 ON.TIME 0 0 +149 WAIT 0 0 +150 FORMAT.FONT 0 0 +151 FILL.UP 0 0 +152 FILL.LEFT 0 0 +153 DELETE.OVERLAY 0 0 +155 SHORT.MENUS 0 0 +159 SET.UPDATE.STATUS 0 0 +161 COLOR.PALETTE 0 0 +162 DELETE.STYLE 0 0 +163 WINDOW.RESTORE 0 0 +164 WINDOW.MAXIMIZE 0 0 +166 CHANGE.LINK 0 0 +167 CALCULATE.DOCUMENT 0 0 +168 ON.KEY 0 0 +169 APP.RESTORE 0 0 +170 APP.MOVE 0 0 +171 APP.SIZE 0 0 +172 APP.MINIMIZE 0 0 +173 APP.MAXIMIZE 0 0 +174 BRING.TO.FRONT 0 0 +175 SEND.TO.BACK 0 0 +185 MAIN.CHART.TYPE 0 0 +186 OVERLAY.CHART.TYPE 0 0 +187 SELECT.END 0 0 +188 OPEN.MAIL 0 0 +189 SEND.MAIL 0 0 +190 STANDARD.FONT 0 0 +191 CONSOLIDATE 0 0 +192 SORT.SPECIAL 0 0 +193 GALLERY.3D.AREA 0 0 +194 GALLERY.3D.COLUMN 0 0 +195 GALLERY.3D.LINE 0 0 +196 GALLERY.3D.PIE 0 0 +197 VIEW.3D 0 0 +198 GOAL.SEEK 0 0 +199 WORKGROUP 0 0 +200 FILL.GROUP 0 0 +201 UPDATE.LINK 0 0 +202 PROMOTE 0 0 +203 DEMOTE 0 0 +204 SHOW.DETAIL 0 0 +206 UNGROUP 0 0 +207 OBJECT.PROPERTIES 0 0 +208 SAVE.NEW.OBJECT 0 0 +209 SHARE 0 0 +210 SHARE.NAME 0 0 +211 DUPLICATE 0 0 +212 APPLY.STYLE 0 0 +213 ASSIGN.TO.OBJECT 0 0 +214 OBJECT.PROTECTION 0 0 +215 HIDE.OBJECT 0 0 +216 SET.EXTRACT 0 0 +217 CREATE.PUBLISHER 0 0 +218 SUBSCRIBE.TO 0 0 +219 ATTRIBUTES 0 0 +220 SHOW.TOOLBAR 0 0 +222 PRINT.PREVIEW 0 0 +223 EDIT.COLOR 0 0 +224 SHOW.LEVELS 0 0 +225 FORMAT.MAIN 0 0 +226 FORMAT.OVERLAY 0 0 +227 ON.RECALC 0 0 +228 EDIT.SERIES 0 0 +229 DEFINE.STYLE 0 0 +240 LINE.PRINT 0 0 +243 ENTER.DATA 0 0 +249 GALLERY.RADAR 0 0 +250 MERGE.STYLES 0 0 +251 EDITION.OPTIONS 0 0 +252 PASTE.PICTURE 0 0 +253 PASTE.PICTURE.LINK 0 0 +254 SPELLING 0 0 +256 ZOOM 0 0 +259 INSERT.OBJECT 0 0 +260 WINDOW.MINIMIZE 0 0 +265 SOUND.NOTE 0 0 +266 SOUND.PLAY 0 0 +267 FORMAT.SHAPE 0 0 +268 EXTEND.POLYGON 0 0 +269 FORMAT.AUTO 0 0 +272 GALLERY.3D.BAR 0 0 +273 GALLERY.3D.SURFACE 0 0 +274 FILL.AUTO 0 0 +276 CUSTOMIZE.TOOLBAR 0 0 +277 ADD.TOOL 0 0 +278 EDIT.OBJECT 0 0 +279 ON.DOUBLECLICK 0 0 +280 ON.ENTRY 0 0 +281 WORKBOOK.ADD 0 0 +282 WORKBOOK.MOVE 0 0 +283 WORKBOOK.COPY 0 0 +284 WORKBOOK.OPTIONS 0 0 +285 SAVE.WORKSPACE 0 0 +288 CHART.WIZARD 0 0 +289 DELETE.TOOL 0 0 +290 MOVE.TOOL 0 0 +291 WORKBOOK.SELECT 0 0 +292 WORKBOOK.ACTIVATE 0 0 +293 ASSIGN.TO.TOOL 0 0 +295 COPY.TOOL 0 0 +296 RESET.TOOL 0 0 +297 CONSTRAIN.NUMERIC 0 0 +298 PASTE.TOOL 0 0 +302 WORKBOOK.NEW 0 0 +305 SCENARIO.CELLS 0 0 +306 SCENARIO.DELETE 0 0 +307 SCENARIO.ADD 0 0 +308 SCENARIO.EDIT 0 0 +309 SCENARIO.SHOW 0 0 +310 SCENARIO.SHOW.NEXT 0 0 +311 SCENARIO.SUMMARY 0 0 +312 PIVOT.TABLE.WIZARD 0 0 +313 PIVOT.FIELD.PROPERTIES 0 0 +314 PIVOT.FIELD 0 0 +315 PIVOT.ITEM 0 0 +316 PIVOT.ADD.FIELDS 0 0 +318 OPTIONS.CALCULATION 0 0 +319 OPTIONS.EDIT 0 0 +320 OPTIONS.VIEW 0 0 +321 ADDIN.MANAGER 0 0 +322 MENU.EDITOR 0 0 +323 ATTACH.TOOLBARS 0 0 +324 VBAActivate 0 0 +325 OPTIONS.CHART 0 0 +328 VBA.INSERT.FILE 0 0 +330 VBA.PROCEDURE.DEFINITION 0 0 +336 ROUTING.SLIP 0 0 +338 ROUTE.DOCUMENT 0 0 +339 MAIL.LOGON 0 0 +342 INSERT.PICTURE 0 0 +343 EDIT.TOOL 0 0 +344 GALLERY.DOUGHNUT 0 0 +350 CHART.TREND 0 0 +352 PIVOT.ITEM.PROPERTIES 0 0 +354 WORKBOOK.INSERT 0 0 +355 OPTIONS.TRANSITION 0 0 +356 OPTIONS.GENERAL 0 0 +370 FILTER.ADVANCED 0 0 +373 MAIL.ADD.MAILER 0 0 +374 MAIL.DELETE.MAILER 0 0 +375 MAIL.REPLY 0 0 +376 MAIL.REPLY.ALL 0 0 +377 MAIL.FORWARD 0 0 +378 MAIL.NEXT.LETTER 0 0 +379 DATA.LABEL 0 0 +380 INSERT.TITLE 0 0 +381 FONT.PROPERTIES 0 0 +382 MACRO.OPTIONS 0 0 +383 WORKBOOK.HIDE 0 0 +384 WORKBOOK.UNHIDE 0 0 +385 WORKBOOK.DELETE 0 0 +386 WORKBOOK.NAME 0 0 +388 GALLERY.CUSTOM 0 0 +390 ADD.CHART.AUTOFORMAT 0 0 +391 DELETE.CHART.AUTOFORMAT 0 0 +392 CHART.ADD.DATA 0 0 +393 AUTO.OUTLINE 0 0 +394 TAB.ORDER 0 0 +395 SHOW.DIALOG 0 0 +396 SELECT.ALL 0 0 +397 UNGROUP.SHEETS 0 0 +398 SUBTOTAL.CREATE 0 0 +399 SUBTOTAL.REMOVE 0 0 +400 RENAME.OBJECT 0 0 +412 WORKBOOK.SCROLL 0 0 +413 WORKBOOK.NEXT 0 0 +414 WORKBOOK.PREV 0 0 +415 WORKBOOK.TAB.SPLIT 0 0 +416 FULL.SCREEN 0 0 +417 WORKBOOK.PROTECT 0 0 +420 SCROLLBAR.PROPERTIES 0 0 +421 PIVOT.SHOW.PAGES 0 0 +422 TEXT.TO.COLUMNS 0 0 +423 FORMAT.CHARTTYPE 0 0 +424 LINK.FORMAT 0 0 +425 TRACER.DISPLAY 0 0 +430 TRACER.NAVIGATE 0 0 +431 TRACER.CLEAR 0 0 +432 TRACER.ERROR 0 0 +433 PIVOT.FIELD.GROUP 0 0 +434 PIVOT.FIELD.UNGROUP 0 0 +435 CHECKBOX.PROPERTIES 0 0 +436 LABEL.PROPERTIES 0 0 +437 LISTBOX.PROPERTIES 0 0 +438 EDITBOX.PROPERTIES 0 0 +439 PIVOT.REFRESH 0 0 +440 LINK.COMBO 0 0 +441 OPEN.TEXT 0 0 +442 HIDE.DIALOG 0 0 +443 SET.DIALOG.FOCUS 0 0 +444 ENABLE.OBJECT 0 0 +445 PUSHBUTTON.PROPERTIES 0 0 +446 SET.DIALOG.DEFAULT 0 0 +447 FILTER 0 0 +448 FILTER.SHOW.ALL 0 0 +449 CLEAR.OUTLINE 0 0 +450 FUNCTION.WIZARD 0 0 +451 ADD.LIST.ITEM 0 0 +452 SET.LIST.ITEM 0 0 +453 REMOVE.LIST.ITEM 0 0 +454 SELECT.LIST.ITEM 0 0 +455 SET.CONTROL.VALUE 0 0 +456 SAVE.COPY.AS 0 0 +458 OPTIONS.LISTS.ADD 0 0 +459 OPTIONS.LISTS.DELETE 0 0 +460 SERIES.AXES 0 0 +461 SERIES.X 0 0 +462 SERIES.Y 0 0 +463 ERRORBAR.X 0 0 +464 ERRORBAR.Y 0 0 +465 FORMAT.CHART 0 0 +466 SERIES.ORDER 0 0 +467 MAIL.LOGOFF 0 0 +468 CLEAR.ROUTING.SLIP 0 0 +469 APP.ACTIVATE.MICROSOFT 0 0 +470 MAIL.EDIT.MAILER 0 0 +471 ON.SHEET 0 0 +472 STANDARD.WIDTH 0 0 +473 SCENARIO.MERGE 0 0 +474 SUMMARY.INFO 0 0 +475 FIND.FILE 0 0 +476 ACTIVE.CELL.FONT 0 0 +477 ENABLE.TIPWIZARD 0 0 +478 VBA.MAKE.ADDIN 0 0 +480 INSERTDATATABLE 0 0 +481 WORKGROUP.OPTIONS 0 0 +482 MAIL.SEND.MAILER 0 0 +485 AUTOCORRECT 0 0 +489 POST.DOCUMENT 0 0 +491 PICKLIST 0 0 +493 VIEW.SHOW 0 0 +494 VIEW.DEFINE 0 0 +495 VIEW.DELETE 0 0 +509 SHEET.BACKGROUND 0 0 +510 INSERT.MAP.OBJECT 0 0 +511 OPTIONS.MENONO 0 0 +517 MSOCHECKS 0 0 +518 NORMAL 0 0 +519 LAYOUT 0 0 +520 RM.PRINT.AREA 0 0 +521 CLEAR.PRINT.AREA 0 0 +522 ADD.PRINT.AREA 0 0 +523 MOVE.BRK 0 0 +545 HIDECURR.NOTE 0 0 +546 HIDEALL.NOTES 0 0 +547 DELETE.NOTE 0 0 +548 TRAVERSE.NOTES 0 0 +549 ACTIVATE.NOTES 0 0 +620 PROTECT.REVISIONS 0 0 +621 UNPROTECT.REVISIONS 0 0 +647 OPTIONS.ME 0 0 +653 WEB.PUBLISH 0 0 +667 NEWWEBQUERY 0 0 +673 PIVOT.TABLE.CHART 0 0 +753 OPTIONS.SAVE 0 0 +755 OPTIONS.SPELL 0 0 +808 HIDEALL.INKANNOTS 0 0 diff --git a/src/testcases/org/apache/poi/hssf/extractor/TestExcelExtractor.java b/src/testcases/org/apache/poi/hssf/extractor/TestExcelExtractor.java index fd9c49dc12..ce4f2b7be8 100644 --- a/src/testcases/org/apache/poi/hssf/extractor/TestExcelExtractor.java +++ b/src/testcases/org/apache/poi/hssf/extractor/TestExcelExtractor.java @@ -34,7 +34,6 @@ import org.apache.poi.hssf.usermodel.HSSFWorkbook; import org.apache.poi.poifs.filesystem.DirectoryNode; import org.apache.poi.poifs.filesystem.POIFSFileSystem; import org.apache.poi.util.LocaleUtil; -import org.junit.After; import org.junit.Test; /** @@ -130,50 +129,42 @@ public final class TestExcelExtractor { public void testEventExtractor() throws Exception { // First up, a simple file with string // based formulas in it - EventBasedExcelExtractor extractor1 = null; - try { - extractor1 = new EventBasedExcelExtractor( - new POIFSFileSystem( - HSSFTestDataSamples.openSampleFileStream("SimpleWithFormula.xls") - ) - ); - extractor1.setIncludeSheetNames(true); - - String text = extractor1.getText(); - assertEquals("Sheet1\nreplaceme\nreplaceme\nreplacemereplaceme\nSheet2\nSheet3\n", text); - - extractor1.setIncludeSheetNames(false); - extractor1.setFormulasNotResults(true); - - text = extractor1.getText(); - assertEquals("replaceme\nreplaceme\nCONCATENATE(A1,A2)\n", text); - } finally { - if (extractor1 != null) extractor1.close(); + try (EventBasedExcelExtractor extractor1 = new EventBasedExcelExtractor( + new POIFSFileSystem( + HSSFTestDataSamples.openSampleFileStream("SimpleWithFormula.xls") + ) + )) { + extractor1.setIncludeSheetNames(true); + + String text = extractor1.getText(); + assertEquals("Sheet1\nreplaceme\nreplaceme\nreplacemereplaceme\nSheet2\nSheet3\n", text); + + extractor1.setIncludeSheetNames(false); + extractor1.setFormulasNotResults(true); + + text = extractor1.getText(); + assertEquals("replaceme\nreplaceme\nCONCATENATE(A1,A2)\n", text); } // Now, a slightly longer file with numeric formulas - EventBasedExcelExtractor extractor2 = null; - try { - extractor2 = new EventBasedExcelExtractor( - new POIFSFileSystem( - HSSFTestDataSamples.openSampleFileStream("sumifformula.xls") - ) - ); - - extractor2.setIncludeSheetNames(false); - extractor2.setFormulasNotResults(true); - - String text = extractor2.getText(); - assertEquals( - "1000\t1\tSUMIF(A1:A5,\">4000\",B1:B5)\n" + - "2000\t2\n" + - "3000\t3\n" + - "4000\t4\n" + - "5000\t5\n", - text - ); - } finally { - if (extractor2 != null) extractor2.close(); + try (EventBasedExcelExtractor extractor2 = new EventBasedExcelExtractor( + new POIFSFileSystem( + HSSFTestDataSamples.openSampleFileStream("sumifformula.xls") + ) + )) { + + extractor2.setIncludeSheetNames(false); + extractor2.setFormulasNotResults(true); + + String text = extractor2.getText(); + assertEquals( + "1000\t1\tSUMIF(A1:A5,\">4000\",B1:B5)\n" + + "2000\t2\n" + + "3000\t3\n" + + "4000\t4\n" + + "5000\t5\n", + text + ); } } @@ -372,4 +363,25 @@ public final class TestExcelExtractor { assertContains(txt, "NONBUSINESS"); } } + + @Test + public void test60405a() throws IOException { + //bug 61045. File is govdocs1 626534 + try (ExcelExtractor extractor = createExtractor("60405.xls")) { + String txt = extractor.getText(); + assertContains(txt, "Macro1"); + assertContains(txt, "Macro2"); + } + } + + @Test + public void test60405b() throws IOException { + //bug 61045. File is govdocs1 626534 + try (ExcelExtractor extractor = createExtractor("60405.xls")) { + extractor.setFormulasNotResults(true); + String txt = extractor.getText(); + assertContains(txt, "Macro1"); + assertContains(txt, "Macro2"); + } + } } diff --git a/src/testcases/org/apache/poi/ss/formula/function/ExcelCetabFunctionExtractor.java b/src/testcases/org/apache/poi/ss/formula/function/ExcelCetabFunctionExtractor.java new file mode 100644 index 0000000000..db87e07377 --- /dev/null +++ b/src/testcases/org/apache/poi/ss/formula/function/ExcelCetabFunctionExtractor.java @@ -0,0 +1,378 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ + +package org.apache.poi.ss.formula.function; + +import java.io.BufferedReader; +import java.io.File; +import java.io.FileInputStream; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.io.OutputStream; +import java.io.PrintStream; +import java.nio.charset.StandardCharsets; +import java.util.Arrays; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Map; +import java.util.Set; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +/** + * This class is not used during normal POI run-time but is used at development time to generate + * the file 'functionMetadataCetab.txt'. There are more than 300 built-in functions in Excel in + * the Cetab and the intention of this class is to make it easier to maintain the metadata, by extracting + * it from a reliable source. + */ +public final class ExcelCetabFunctionExtractor { + + private static final String SOURCE_DOC_FILE_NAME = "functionMetadataCetab-PDF.txt"; + + /** + * For simplicity, the output file is strictly simple ASCII. + * This method detects any unexpected characters. + */ + /* package */ static boolean isSimpleAscii(char c) { + + if (c>=0x21 && c<=0x7E) { + // everything from '!' to '~' (includes letters, digits, punctuation + return true; + } + // some specific whitespace chars below 0x21: + switch(c) { + case ' ': + case '\t': + case '\r': + case '\n': + return true; + } + return false; + } + + + private static final class FunctionData { + // special characters from the ooo document + private static final int CHAR_ELLIPSIS_8230 = 8230; + private static final int CHAR_NDASH_8211 = 8211; + + private final int _index; + private final boolean _hasFootnote; + private final String _name; + private final int _minParams; + private final int _maxParams; + private final String _returnClass; + private final String _paramClasses; + private final boolean _isVolatile; + + public FunctionData(int funcIx, boolean hasFootnote, String funcName, int minParams, int maxParams, + String returnClass, String paramClasses, boolean isVolatile) { + _index = funcIx; + _hasFootnote = hasFootnote; + _name = funcName; + _minParams = minParams; + _maxParams = maxParams; + _returnClass = convertSpecialChars(returnClass); + _paramClasses = convertSpecialChars(paramClasses); + _isVolatile = isVolatile; + } + private static String convertSpecialChars(String ss) { + StringBuilder sb = new StringBuilder(ss.length() + 4); + for(int i=0; i _allFunctionsByIndex; + private final Map _allFunctionsByName; + private final Set _groupFunctionIndexes; + private final Set _groupFunctionNames; + private final PrintStream _ps; + + public FunctionDataCollector(PrintStream ps) { + _ps = ps; + _allFunctionsByIndex = new HashMap<>(); + _allFunctionsByName = new HashMap<>(); + _groupFunctionIndexes = new HashSet<>(); + _groupFunctionNames = new HashSet<>(); + } + + public void addFunction(int funcIx, boolean hasFootnote, String funcName, int minParams, int maxParams, + String returnClass, String paramClasses, String volatileFlagStr) { + boolean isVolatile = volatileFlagStr.length() > 0; + + Integer funcIxKey = Integer.valueOf(funcIx); + if(!_groupFunctionIndexes.add(funcIxKey)) { + throw new RuntimeException("Duplicate function index (" + funcIx + ")"); + } + if(!_groupFunctionNames.add(funcName)) { + throw new RuntimeException("Duplicate function name '" + funcName + "'"); + } + + checkRedefinedFunction(hasFootnote, funcName, funcIxKey); + FunctionData fd = new FunctionData(funcIx, hasFootnote, funcName, + minParams, maxParams, returnClass, paramClasses, isVolatile); + + _allFunctionsByIndex.put(funcIxKey, fd); + _allFunctionsByName.put(funcName, fd); + } + + /** + * Some extra validation here. + * Any function which changes definition will have a footnote in the source document + */ + private void checkRedefinedFunction(boolean hasNote, String funcName, Integer funcIxKey) { + FunctionData fdPrev; + // check by index + fdPrev = _allFunctionsByIndex.get(funcIxKey); + if(fdPrev != null) { + if(!fdPrev.hasFootnote() || !hasNote) { + throw new RuntimeException("changing function [" + + funcIxKey + "] definition without foot-note"); + } + _allFunctionsByName.remove(fdPrev.getName()); + } + // check by name + fdPrev = _allFunctionsByName.get(funcName); + if(fdPrev != null) { + if(!fdPrev.hasFootnote() || !hasNote) { + throw new RuntimeException("changing function '" + + funcName + "' definition without foot-note"); + } + _allFunctionsByIndex.remove(Integer.valueOf(fdPrev.getIndex())); + } + } + + public void endTableGroup(String headingText) { + Integer[] keys = new Integer[_groupFunctionIndexes.size()]; + _groupFunctionIndexes.toArray(keys); + _groupFunctionIndexes.clear(); + _groupFunctionNames.clear(); + Arrays.sort(keys); + + _ps.println("# " + headingText); + for (Integer key : keys) { + FunctionData fd = _allFunctionsByIndex.get(key); + _ps.println(fd.formatAsDataLine()); + } + } + } + + private static final Pattern ID_MATCH = Pattern.compile("0x([\\dA-F]+)"); + private static final Pattern NAME_MATCH = Pattern.compile("([0-9A-Z.]+)"); + private static final Pattern ID_NAME_MATCH = Pattern.compile("0x([\\dA-F]+)\\s+([0-9A-Z.]+)"); + + private static final Set IGNORED_LINES = new HashSet<>(); + static { + IGNORED_LINES.add("[MS-XLS] — v20141018"); + IGNORED_LINES.add("Excel Binary File Format (.xls) Structure"); + IGNORED_LINES.add("Copyright © 2014 Microsoft Corporation."); + IGNORED_LINES.add("Release: October 30, 2014Value Meaning"); + IGNORED_LINES.add("Release: October 30, 2014Value"); + IGNORED_LINES.add("Meaning"); + } + + private static void extractFunctionData(FunctionDataCollector fdc, InputStream is) throws IOException { + try (BufferedReader reader = new BufferedReader(new InputStreamReader(is, StandardCharsets.UTF_8))) { + + String id = null; + String name = null; + while (true) { + String line = reader.readLine(); + if(line == null) { + break; + } + + if(IGNORED_LINES.contains(line) || line.matches("\\d+ / \\d+")) { + continue; + } + + Matcher idMatcher = ID_MATCH.matcher(line); + boolean foundID = idMatcher.matches(); + Matcher nameMatcher = NAME_MATCH.matcher(line); + boolean foundName = nameMatcher.matches(); + Matcher idAndNameMatcher = ID_NAME_MATCH.matcher(line); + boolean foundIDandName = idAndNameMatcher.matches(); + if(foundID && foundName || + foundName && foundIDandName || + foundID && foundIDandName) { + throw new IllegalStateException("Should not find id and name: " + + foundID + "/" + foundName + "/" + foundIDandName + + ", line: " + line); + } + + if(foundID && id != null) { + throw new IllegalStateException("Found ID, but already had one: " + id + ", line: " + line); + } + if(foundName && name != null) { + throw new IllegalStateException("Found name, but already had one: " + name + ", line: " + line); + } + if(foundIDandName && (name != null || id != null)) { + throw new IllegalStateException("Found name and id, but already had one: id: " + id + ", name: " + name + ", line: " + line); + } + + if(foundID) { + id = idMatcher.group(1); + } else if (foundName) { + name = nameMatcher.group(1); + } else if (foundIDandName) { + id = idAndNameMatcher.group(1); + name = idAndNameMatcher.group(2); + // manual override for one function name which contains lowercase characters + } else if(line.equals("VBAActivate")) { + name = line; + } else if (id == null || name == null) { + throw new IllegalStateException("Found params, but had empty id or name, id: " + id + + ", name: " + name + ", line: " + line); + } else { + System.out.println("Found function " + id + " " + name + " " + line); + fdc.addFunction(Integer.parseInt(id, 16), false, name, 0, 0, + "", "", ""); + + id = null; + name = null; + } + } + } + + fdc.endTableGroup(""); + } + /** + * To be sure that no tricky unicode chars make it through to the output file. + */ + private static final class SimpleAsciiOutputStream extends OutputStream { + + private final OutputStream _os; + + public SimpleAsciiOutputStream(OutputStream os) { + _os = os; + } + + @Override + public void write(int b) throws IOException { + checkByte(b); + _os.write(b); + } + + private static void checkByte(int b) { + if (!isSimpleAscii((char)b)) { + throw new RuntimeException("Encountered char (" + b + ") which was not simple ascii as expected"); + } + } + + @Override + public void write(byte[] b, int off, int len) throws IOException { + for (int i = 0; i < len; i++) { + checkByte(b[i + off]); + + } + _os.write(b, off, len); + } + } + + private static void processFile(InputStream input, File outFile) throws IOException { + try (OutputStream os = new SimpleAsciiOutputStream(new FileOutputStream(outFile)); + PrintStream ps = new PrintStream(os, true, "UTF-8")) { + + outputLicenseHeader(ps); + Class genClass = ExcelCetabFunctionExtractor.class; + ps.println("# Created by (" + genClass.getName() + ")"); + // identify the source file + ps.println("# from source file '" + SOURCE_DOC_FILE_NAME + "'"); + ps.println("#"); + ps.println("#Columns: (index, name, minParams, maxParams, returnClass, paramClasses, isVolatile, hasFootnote )"); + ps.println(); + extractFunctionData(new FunctionDataCollector(ps), input); + ps.close(); + + String canonicalOutputFileName = outFile.getCanonicalPath(); + System.out.println("Successfully output to '" + canonicalOutputFileName + "'"); + } + } + + private static void outputLicenseHeader(PrintStream ps) { + String[] lines= { + "Licensed to the Apache Software Foundation (ASF) under one or more", + "contributor license agreements. See the NOTICE file distributed with", + "this work for additional information regarding copyright ownership.", + "The ASF licenses this file to You under the Apache License, Version 2.0", + "(the \"License\"); you may not use this file except in compliance with", + "the License. You may obtain a copy of the License at", + "", + " http://www.apache.org/licenses/LICENSE-2.0", + "", + "Unless required by applicable law or agreed to in writing, software", + "distributed under the License is distributed on an \"AS IS\" BASIS,", + "WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.", + "See the License for the specific language governing permissions and", + "limitations under the License.", + }; + for (String line : lines) { + ps.print("# "); + ps.println(line); + } + ps.println(); + } + + public static void main(String[] args) throws IOException { + if(!new File(SOURCE_DOC_FILE_NAME).exists()) { + throw new IllegalStateException("Did not find file " + SOURCE_DOC_FILE_NAME + " in the resources"); + } + + try (InputStream stream = new FileInputStream(SOURCE_DOC_FILE_NAME)) { + File outFile = new File("functionMetadataCetab.txt"); + + processFile(stream, outFile); + } + } +} diff --git a/test-data/spreadsheet/60405.xls b/test-data/spreadsheet/60405.xls new file mode 100644 index 0000000000..a63710428e Binary files /dev/null and b/test-data/spreadsheet/60405.xls differ