Bug 60405: Add initial support for cetab functions so some macros can be

parsed
Add some function-definitions for Excel 4 Macros and missing functions
found in regression tests

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1852277 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Dominik Stadler 2019-01-27 09:57:39 +00:00
parent e59f9c6461
commit c376d662f9
13 changed files with 1016 additions and 110 deletions

View File

@ -31,9 +31,11 @@ import java.util.Set;
import org.apache.poi.EncryptedDocumentException;
import org.apache.poi.extractor.POIOLE2TextExtractor;
import org.apache.poi.extractor.POITextExtractor;
import org.apache.poi.hssf.extractor.EventBasedExcelExtractor;
import org.apache.poi.ooxml.extractor.ExtractorFactory;
import org.apache.poi.hpsf.extractor.HPSFPropertiesExtractor;
import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
import org.apache.poi.ss.extractor.ExcelExtractor;
import org.apache.poi.util.IOUtils;
import org.apache.xmlbeans.XmlException;
@ -83,6 +85,7 @@ public abstract class AbstractFileHandler implements FileHandler {
long modified = file.lastModified();
POITextExtractor extractor = null;
String fileAndParentName = file.getParentFile().getName() + "/" + file.getName();
try {
extractor = ExtractorFactory.createExtractor(file);
assertNotNull("Should get a POITextExtractor but had none for file " + file, extractor);
@ -95,7 +98,7 @@ public abstract class AbstractFileHandler implements FileHandler {
assertNotNull(metadataExtractor.getText());
assertFalse("Expected Extraction to fail for file " + file + " and handler " + this + ", but did not fail!",
EXPECTED_EXTRACTOR_FAILURES.contains(file.getParentFile().getName() + "/" + file.getName()));
EXPECTED_EXTRACTOR_FAILURES.contains(fileAndParentName));
assertEquals("File should not be modified by extractor", length, file.length());
assertEquals("File should not be modified by extractor", modified, file.lastModified());
@ -111,8 +114,24 @@ public abstract class AbstractFileHandler implements FileHandler {
assertNotNull(text);
}
}
// test again with including formulas and cell-comments as this caused some bugs
if(extractor instanceof ExcelExtractor &&
// comment-extraction and formula extraction are not well supported in event based extraction
!(extractor instanceof EventBasedExcelExtractor)) {
((ExcelExtractor)extractor).setFormulasNotResults(true);
String text = extractor.getText();
assertNotNull(text);
// */
((ExcelExtractor) extractor).setIncludeCellComments(true);
text = extractor.getText();
assertNotNull(text);
}
} catch (IllegalArgumentException e) {
if(!EXPECTED_EXTRACTOR_FAILURES.contains(file.getParentFile().getName() + "/" + file.getName())) {
if(!EXPECTED_EXTRACTOR_FAILURES.contains(fileAndParentName)) {
throw e;
}
} catch (EncryptedDocumentException e) {

View File

@ -81,8 +81,7 @@ final class FunctionDataBuilder {
FunctionMetadata[] jumbledArray = new FunctionMetadata[_functionDataByName.size()];
_functionDataByName.values().toArray(jumbledArray);
FunctionMetadata[] fdIndexArray = new FunctionMetadata[_maxFunctionIndex+1];
for (int i = 0; i < jumbledArray.length; i++) {
FunctionMetadata fd = jumbledArray[i];
for (FunctionMetadata fd : jumbledArray) {
fdIndexArray[fd.getIndex()] = fd;
}

View File

@ -31,6 +31,7 @@ public final class FunctionMetadata {
* to make that file more version neutral.
* @see org.apache.poi.ss.formula.FormulaParser#validateNumArgs(int, FunctionMetadata)
*/
@SuppressWarnings("JavadocReference")
private static final short FUNCTION_MAX_PARAMS = 30;
private final int _index;
@ -49,27 +50,35 @@ public final class FunctionMetadata {
_returnClassCode = returnClassCode;
_parameterClassCodes = (parameterClassCodes == null) ? null : parameterClassCodes.clone();
}
public int getIndex() {
return _index;
}
public String getName() {
return _name;
}
public int getMinParams() {
return _minParams;
}
public int getMaxParams() {
return _maxParams;
}
public boolean hasFixedArgsLength() {
return _minParams == _maxParams;
}
public byte getReturnClassCode() {
return _returnClassCode;
}
public byte[] getParameterClassCodes() {
return _parameterClassCodes.clone();
}
/**
* Some varags functions (like VLOOKUP) have a specific limit to the number of arguments that
* can be passed. Other functions (like SUM) don't have such a limit. For those functions,
@ -80,11 +89,8 @@ public final class FunctionMetadata {
public boolean hasUnlimitedVarags() {
return FUNCTION_MAX_PARAMS == _maxParams;
}
public String toString() {
StringBuffer sb = new StringBuffer(64);
sb.append(getClass().getName()).append(" [");
sb.append(_index).append(" ").append(_name);
sb.append("]");
return sb.toString();
return getClass().getName() + " [" + _index + " " + _name + "]";
}
}

View File

@ -41,6 +41,7 @@ final class FunctionMetadataReader {
private static final int MAX_RECORD_LENGTH = 100_000;
private static final String METADATA_FILE_NAME = "functionMetadata.txt";
private static final String METADATA_FILE_NAME_CETAB = "functionMetadataCetab.txt";
/** plain ASCII text metadata file uses three dots for ellipsis */
private static final String ELLIPSIS = "...";
@ -52,51 +53,56 @@ final class FunctionMetadataReader {
private static final String[] DIGIT_ENDING_FUNCTION_NAMES = {
// Digits at the end of a function might be due to a left-over footnote marker.
// except in these cases
"LOG10", "ATAN2", "DAYS360", "SUMXMY2", "SUMX2MY2", "SUMX2PY2",
"LOG10", "ATAN2", "DAYS360", "SUMXMY2", "SUMX2MY2", "SUMX2PY2", "A1.R1C1",
};
private static final Set<String> DIGIT_ENDING_FUNCTION_NAMES_SET = new HashSet<>(Arrays.asList(DIGIT_ENDING_FUNCTION_NAMES));
public static FunctionMetadataRegistry createRegistry() {
try {
InputStream is = FunctionMetadataReader.class.getResourceAsStream(METADATA_FILE_NAME);
if (is == null) {
throw new RuntimeException("resource '" + METADATA_FILE_NAME + "' not found");
}
try {
try(BufferedReader br = new BufferedReader(new InputStreamReader(is, StandardCharsets.UTF_8))) {
FunctionDataBuilder fdb = new FunctionDataBuilder(400);
while (true) {
String line = br.readLine();
if (line == null) {
break;
}
if (line.length() < 1 || line.charAt(0) == '#') {
continue;
}
String trimLine = line.trim();
if (trimLine.length() < 1) {
continue;
}
processLine(fdb, line);
}
FunctionDataBuilder fdb = new FunctionDataBuilder(800);
readResourceFile(fdb, METADATA_FILE_NAME);
return fdb.build();
}
return fdb.build();
}
} finally {
is.close();
}
} catch (IOException e) {
throw new RuntimeException(e);
}
public static FunctionMetadataRegistry createRegistryCetab() {
FunctionDataBuilder fdb = new FunctionDataBuilder(800);
readResourceFile(fdb, METADATA_FILE_NAME_CETAB);
return fdb.build();
}
private static void readResourceFile(FunctionDataBuilder fdb, String resourceFile) {
try (InputStream is = FunctionMetadataReader.class.getResourceAsStream(resourceFile)) {
if (is == null) {
throw new RuntimeException("resource '" + resourceFile + "' not found");
}
try(BufferedReader br = new BufferedReader(new InputStreamReader(is, StandardCharsets.UTF_8))) {
while (true) {
String line = br.readLine();
if (line == null) {
break;
}
if (line.length() < 1 || line.charAt(0) == '#') {
continue;
}
String trimLine = line.trim();
if (trimLine.length() < 1) {
continue;
}
processLine(fdb, line);
}
}
} catch (IOException e) {
throw new RuntimeException(e);
}
}
private static void processLine(FunctionDataBuilder fdb, String line) {
String[] parts = TAB_DELIM_PATTERN.split(line, -2);
if(parts.length != 8) {
throw new RuntimeException("Bad line format '" + line + "' - expected 8 data fields");
throw new RuntimeException("Bad line format '" + line + "' - expected 8 data fields delimited by tab, " +
"but had " + parts.length + ": " + Arrays.toString(parts));
}
int functionIndex = parseInt(parts[0]);
String functionName = parts[1];

View File

@ -37,6 +37,7 @@ public final class FunctionMetadataRegistry {
public static final short FUNCTION_INDEX_EXTERNAL = 255;
private static FunctionMetadataRegistry _instance;
private static FunctionMetadataRegistry _instanceCetab;
private final FunctionMetadata[] _functionDataByIndex;
private final Map<String, FunctionMetadata> _functionDataByName;
@ -48,6 +49,13 @@ public final class FunctionMetadataRegistry {
return _instance;
}
private static FunctionMetadataRegistry getInstanceCetab() {
if (_instanceCetab == null) {
_instanceCetab = FunctionMetadataReader.createRegistryCetab();
}
return _instanceCetab;
}
/* package */ FunctionMetadataRegistry(FunctionMetadata[] functionDataByIndex, Map<String, FunctionMetadata> functionDataByName) {
_functionDataByIndex = (functionDataByIndex == null) ? null : functionDataByIndex.clone();
_functionDataByName = functionDataByName;
@ -62,6 +70,10 @@ public final class FunctionMetadataRegistry {
return getInstance().getFunctionByIndexInternal(index);
}
public static FunctionMetadata getCetabFunctionByIndex(int index) {
return getInstanceCetab().getFunctionByIndexInternal(index);
}
private FunctionMetadata getFunctionByIndexInternal(int index) {
return _functionDataByIndex[index];
}
@ -74,7 +86,11 @@ public final class FunctionMetadataRegistry {
public static short lookupIndexByName(String name) {
FunctionMetadata fd = getInstance().getFunctionByNameInternal(name);
if (fd == null) {
return -1;
// also try the cetab functions
fd = getInstanceCetab().getFunctionByNameInternal(name);
if (fd == null) {
return -1;
}
}
return (short) fd.getIndex();
}
@ -83,8 +99,12 @@ public final class FunctionMetadataRegistry {
return _functionDataByName.get(name);
}
public static FunctionMetadata getFunctionByName(String name) {
return getInstance().getFunctionByNameInternal(name);
FunctionMetadata fm = getInstance().getFunctionByNameInternal(name);
if(fm == null) {
return getInstanceCetab().getFunctionByNameInternal(name);
}
return fm;
}
}

View File

@ -123,13 +123,22 @@ public abstract class AbstractFunctionPtg extends OperationPtg {
return ix >= 0;
}
protected final String lookupName(short index) {
protected String lookupName(short index) {
return lookupName(index, false);
}
protected final String lookupName(short index, boolean isCetab) {
if(index == FunctionMetadataRegistry.FUNCTION_INDEX_EXTERNAL) {
return "#external#";
}
FunctionMetadata fm = FunctionMetadataRegistry.getFunctionByIndex(index);
final FunctionMetadata fm;
if(isCetab) {
fm = FunctionMetadataRegistry.getCetabFunctionByIndex(index);
} else {
fm = FunctionMetadataRegistry.getFunctionByIndex(index);
}
if(fm == null) {
throw new RuntimeException("bad function index (" + index + ")");
throw new RuntimeException("bad function index (" + index + ", " + isCetab + ")");
}
return fm.getName();
}

View File

@ -69,9 +69,6 @@ public final class ExpPtg extends ControlPtg {
@Override
public String toString() {
StringBuffer buffer = new StringBuffer("[Array Formula or Shared Formula]\n");
buffer.append("row = ").append(getRow()).append("\n");
buffer.append("col = ").append(getColumn()).append("\n");
return buffer.toString();
return "[Array Formula or Shared Formula]\n" + "row = " + getRow() + "\n" + "col = " + getColumn() + "\n";
}
}

View File

@ -18,6 +18,8 @@
package org.apache.poi.ss.formula.ptg;
import org.apache.poi.ss.formula.function.FunctionMetadata;
import org.apache.poi.ss.formula.function.FunctionMetadataRegistry;
import org.apache.poi.util.BitField;
import org.apache.poi.util.BitFieldFactory;
import org.apache.poi.util.LittleEndianInput;
import org.apache.poi.util.LittleEndianOutput;
@ -25,24 +27,29 @@ import org.apache.poi.util.LittleEndianOutput;
* @author Jason Height (jheight at chariot dot net dot au)
*/
public final class FuncVarPtg extends AbstractFunctionPtg{
public final static byte sid = 0x22;
private final static int SIZE = 4;
// See spec at 2.5.198.63 PtgFuncVar
private static final BitField ceFunc = BitFieldFactory.getInstance(0xF000);
/**
* Single instance of this token for 'sum() taking a single argument'
*/
public static final OperationPtg SUM = FuncVarPtg.create("SUM", 1);
private FuncVarPtg(int functionIndex, int returnClass, byte[] paramClasses, int numArgs) {
private final boolean _isCetab;
private FuncVarPtg(int functionIndex, int returnClass, byte[] paramClasses, int numArgs, boolean isCetab) {
super(functionIndex, returnClass, paramClasses, numArgs);
_isCetab = isCetab;
}
/**Creates new function pointer from a byte array
* usually called while reading an excel file.
*/
public static FuncVarPtg create(LittleEndianInput in) {
return create(in.readByte(), in.readShort());
return create(in.readByte(), in.readUShort());
}
/**
@ -53,12 +60,25 @@ public final class FuncVarPtg extends AbstractFunctionPtg{
}
private static FuncVarPtg create(int numArgs, int functionIndex) {
FunctionMetadata fm = FunctionMetadataRegistry.getFunctionByIndex(functionIndex);
if(fm == null) {
// Happens only as a result of a call to FormulaParser.parse(), with a non-built-in function name
return new FuncVarPtg(functionIndex, Ptg.CLASS_VALUE, new byte[] {Ptg.CLASS_VALUE}, numArgs);
final FunctionMetadata fm;
boolean isCetab = ceFunc.isSet(functionIndex);
if(isCetab) {
functionIndex = ceFunc.clear(functionIndex);
fm = FunctionMetadataRegistry.getCetabFunctionByIndex(functionIndex);
} else {
fm = FunctionMetadataRegistry.getFunctionByIndex(functionIndex);
}
return new FuncVarPtg(functionIndex, fm.getReturnClassCode(), fm.getParameterClassCodes(), numArgs);
if (fm == null) {
// Happens only as a result of a call to FormulaParser.parse(), with a non-built-in function name
return new FuncVarPtg(functionIndex, Ptg.CLASS_VALUE, new byte[]{Ptg.CLASS_VALUE}, numArgs, isCetab);
}
return new FuncVarPtg(functionIndex, fm.getReturnClassCode(), fm.getParameterClassCodes(), numArgs, isCetab);
}
@Override
protected String lookupName(short index) {
return lookupName(index, _isCetab);
}
public void write(LittleEndianOutput out) {

View File

@ -74,6 +74,8 @@
50 TREND 1 3 A R R R x
51 LOGEST 1 2 A R R x
52 GROWTH 1 3 A R R R x
53 GOTO 1 1 R R
55 RETURN 1 1 V V
56 PV 3 5 V V V V V V
# Built-In Sheet Functions in BIFF2
57 FV 3 5 V V V V V V
@ -98,8 +100,13 @@
76 ROWS 1 1 V A
77 COLUMNS 1 1 V A
78 OFFSET 3 5 R R V V V V x
79 ABSREF 2 2 R V R
80 RELREF 2 2 R V V
81 ARGUMENT 0 3 V V V R
82 SEARCH 2 3 V V V V
83 TRANSPOSE 1 1 A A
84 ERROR 0 2 V V R
85 STEP 0 0
86 TYPE 1 1 V V
97 ATAN2 2 2 V V V
98 ASIN 1 1 V V
@ -109,6 +116,7 @@
102 VLOOKUP 3 3 V V R R x
105 ISREF 1 1 V R
109 LOG 1 2 V V V
110 EXEC 1 4 V V V V V
111 CHAR 1 1 V V
112 LOWER 1 1 V V
113 UPPER 1 1 V V
@ -134,6 +142,7 @@
143 SYD 4 4 V V V V V
144 DDB 4 5 V V V V V V
148 INDIRECT 1 2 R V V x
150 CALL 1 3 V V R R
162 CLEAN 1 1 V V
163 MDETERM 1 1 V A
164 MINVERSE 1 1 A A
@ -143,6 +152,10 @@
169 COUNTA 0 30 V R
183 PRODUCT 0 30 V R
184 FACT 1 1 V V
185 GET.CELL 1 2 V V R
186 GET.WORKSPACE 1 1 V V
187 GET.WINDOW 1 2 V V V
188 GET.DOCUMENT 1 2 V V V
189 DPRODUCT 3 3 V R R R
190 ISNONTEXT 1 1 V V
193 STDEVP 1 30 V R
@ -174,6 +187,7 @@
220 DAYS360 2 2 V V V x
221 TODAY 0 0 V - x
222 VDB 5 7 V V V V V V V V
225 END.IF 0 0
227 MEDIAN 1 30 V R ...
228 SUMPRODUCT 1 30 V A ...
229 SINH 1 1 V V
@ -187,11 +201,20 @@
# New Built-In Sheet Functions in BIFF4
14 FIXED 1 3 V V V V x
204 USDOLLAR 1 2 V V V x
238 LAST.ERROR 0 0
215 DBCS 1 1 V V x
216 RANK 2 3 V V R V
247 DB 4 5 V V V V V V
252 FREQUENCY 2 2 A R R
257 EVALUATE 1 1 V V
261 ERROR.TYPE 1 1 V V
262 APP.TITLE 0 1 V V
263 WINDOW.TITLE 0 1 V V
264 SAVE.TOOLBAR 0 2 V V V
265 ENABLE.TOOL 3 3 V V V V
266 PRESS.TOOL 3 3 V V V V
267 REGISTER.ID 2 3 V V V V
268 GET.WORKBOOK 1 2 V V V
269 AVEDEV 1 30 V R ...
270 BETADIST 3 5 V V V V V V
271 GAMMALN 1 1 V V
@ -272,13 +295,13 @@
350 ISPMT 4 4 V V V V V
351 DATEDIF 3 3 V V V V
352 DATESTRING 1 1 V V
353 NUMBERSTRING 2 2 V V V
354 ROMAN 1 2 V V V
# New Built-In Sheet Functions in BIFF8
358 GETPIVOTDATA 2 30 V V R ...
359 HYPERLINK 1 2 V V V
360 PHONETIC 1 1 V R
361 AVERAGEA 1 30 V R ...
353 NUMBERSTRING 2 2 V V V
354 ROMAN 1 2 V V V
# New Built-In Sheet Functions in BIFF8
358 GETPIVOTDATA 2 30 V V R ...
359 HYPERLINK 1 2 V V V
360 PHONETIC 1 1 V R
361 AVERAGEA 1 30 V R ...
362 MAXA 1 30 V R ...
363 MINA 1 30 V R ...
364 STDEVPA 1 30 V R ...

View File

@ -0,0 +1,417 @@
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Created by (org.apache.poi.ss.formula.function.ExcelCetabFunctionExtractor)
# from source file 'org/apache/poi/ss/formula/function/functionMetadataCetab-PDF.txt'
#
#Columns: (index, name, minParams, maxParams, returnClass, paramClasses, isVolatile, hasFootnote )
#
0 BEEP 0 0
1 OPEN 0 0
2 OPEN.LINKS 0 0
3 CLOSE.ALL 0 0
4 SAVE 0 0
5 SAVE.AS 0 0
6 FILE.DELETE 0 0
7 PAGE.SETUP 0 0
8 PRINT 0 0
9 PRINTER.SETUP 0 0
10 QUIT 0 0
11 NEW.WINDOW 0 0
12 ARRANGE.ALL 0 0
13 WINDOW.SIZE 0 0
14 WINDOW.MOVE 0 0
15 FULL 0 0
16 CLOSE 0 0
17 RUN 0 0
22 SET.PRINT.AREA 0 0
23 SET.PRINT.TITLES 0 0
24 SET.PAGE.BREAK 0 0
25 REMOVE.PAGE.BREAK 0 0
26 FONT 0 0
27 DISPLAY 0 0
28 PROTECT.DOCUMENT 0 0
29 PRECISION 0 0
30 A1.R1C1 0 0
31 CALCULATE.NOW 0 0
32 CALCULATION 0 0
34 DATA.FIND 0 0
35 EXTRACT 0 0
36 DATA.DELETE 0 0
37 SET.DATABASE 0 0
38 SET.CRITERIA 0 0
39 SORT 0 0
40 DATA.SERIES 0 0
41 TABLE 0 0
42 FORMAT.NUMBER 0 0
43 ALIGNMENT 0 0
44 STYLE 0 0
45 BORDER 0 0
46 CELL.PROTECTION 0 0
47 COLUMN.WIDTH 0 0
48 UNDO 0 0
49 CUT 0 0
50 COPY 0 0
51 PASTE 0 0
52 CLEAR 0 0
53 PASTE.SPECIAL 0 0
54 EDIT.DELETE 0 0
55 INSERT 0 0
56 FILL.RIGHT 0 0
57 FILL.DOWN 0 0
61 DEFINE.NAME 0 0
62 CREATE.NAMES 0 0
63 FORMULA.GOTO 0 0
64 FORMULA.FIND 0 0
65 SELECT.LAST.CELL 0 0
66 SHOW.ACTIVE.CELL 0 0
67 GALLERY.AREA 0 0
68 GALLERY.BAR 0 0
69 GALLERY.COLUMN 0 0
70 GALLERY.LINE 0 0
71 GALLERY.PIE 0 0
72 GALLERY.SCATTER 0 0
73 COMBINATION 0 0
74 PREFERRED 0 0
75 ADD.OVERLAY 0 0
76 GRIDLINES 0 0
77 SET.PREFERRED 0 0
78 AXES 0 0
79 LEGEND 0 0
80 ATTACH.TEXT 0 0
81 ADD.ARROW 0 0
82 SELECT.CHART 0 0
83 SELECT.PLOT.AREA 0 0
84 PATTERNS 0 0
85 MAIN.CHART 0 0
86 OVERLAY 0 0
87 SCALE 0 0
88 FORMAT.LEGEND 0 0
89 FORMAT.TEXT 0 0
90 EDIT.REPEAT 0 0
91 PARSE 0 0
92 JUSTIFY 0 0
93 HIDE 0 0
94 UNHIDE 0 0
95 WORKSPACE 0 0
96 FORMULA 0 0
97 FORMULA.FILL 0 0
98 FORMULA.ARRAY 0 0
99 DATA.FIND.NEXT 0 0
100 DATA.FIND.PREV 0 0
101 FORMULA.FIND.NEXT 0 0
102 FORMULA.FIND.PREV 0 0
103 ACTIVATE 0 0
104 ACTIVATE.NEXT 0 0
105 ACTIVATE.PREV 0 0
106 UNLOCKED.NEXT 0 0
107 UNLOCKED.PREV 0 0
108 COPY.PICTURE 0 0
109 SELECT 0 0
110 DELETE.NAME 0 0
111 DELETE.FORMAT 0 0
112 VLINE 0 0
113 HLINE 0 0
114 VPAGE 0 0
115 HPAGE 0 0
116 VSCROLL 0 0
117 HSCROLL 0 0
118 ALERT 0 0
119 NEW 0 0
120 CANCEL.COPY 0 0
121 SHOW.CLIPBOARD 0 0
122 MESSAGE 0 0
124 PASTE.LINK 0 0
125 APP.ACTIVATE 0 0
126 DELETE.ARROW 0 0
127 ROW.HEIGHT 0 0
128 FORMAT.MOVE 0 0
129 FORMAT.SIZE 0 0
130 FORMULA.REPLACE 0 0
131 SEND.KEYS 0 0
132 SELECT.SPECIAL 0 0
133 APPLY.NAMES 0 0
134 REPLACE.FONT 0 0
135 FREEZE.PANES 0 0
136 SHOW.INFO 0 0
137 SPLIT 0 0
138 ON.WINDOW 0 0
139 ON.DATA 0 0
140 DISABLE.INPUT 0 0
142 OUTLINE 0 0
143 LIST.NAMES 0 0
144 FILE.CLOSE 0 0
145 SAVE.WORKBOOK 0 0
146 DATA.FORM 0 0
147 COPY.CHART 0 0
148 ON.TIME 0 0
149 WAIT 0 0
150 FORMAT.FONT 0 0
151 FILL.UP 0 0
152 FILL.LEFT 0 0
153 DELETE.OVERLAY 0 0
155 SHORT.MENUS 0 0
159 SET.UPDATE.STATUS 0 0
161 COLOR.PALETTE 0 0
162 DELETE.STYLE 0 0
163 WINDOW.RESTORE 0 0
164 WINDOW.MAXIMIZE 0 0
166 CHANGE.LINK 0 0
167 CALCULATE.DOCUMENT 0 0
168 ON.KEY 0 0
169 APP.RESTORE 0 0
170 APP.MOVE 0 0
171 APP.SIZE 0 0
172 APP.MINIMIZE 0 0
173 APP.MAXIMIZE 0 0
174 BRING.TO.FRONT 0 0
175 SEND.TO.BACK 0 0
185 MAIN.CHART.TYPE 0 0
186 OVERLAY.CHART.TYPE 0 0
187 SELECT.END 0 0
188 OPEN.MAIL 0 0
189 SEND.MAIL 0 0
190 STANDARD.FONT 0 0
191 CONSOLIDATE 0 0
192 SORT.SPECIAL 0 0
193 GALLERY.3D.AREA 0 0
194 GALLERY.3D.COLUMN 0 0
195 GALLERY.3D.LINE 0 0
196 GALLERY.3D.PIE 0 0
197 VIEW.3D 0 0
198 GOAL.SEEK 0 0
199 WORKGROUP 0 0
200 FILL.GROUP 0 0
201 UPDATE.LINK 0 0
202 PROMOTE 0 0
203 DEMOTE 0 0
204 SHOW.DETAIL 0 0
206 UNGROUP 0 0
207 OBJECT.PROPERTIES 0 0
208 SAVE.NEW.OBJECT 0 0
209 SHARE 0 0
210 SHARE.NAME 0 0
211 DUPLICATE 0 0
212 APPLY.STYLE 0 0
213 ASSIGN.TO.OBJECT 0 0
214 OBJECT.PROTECTION 0 0
215 HIDE.OBJECT 0 0
216 SET.EXTRACT 0 0
217 CREATE.PUBLISHER 0 0
218 SUBSCRIBE.TO 0 0
219 ATTRIBUTES 0 0
220 SHOW.TOOLBAR 0 0
222 PRINT.PREVIEW 0 0
223 EDIT.COLOR 0 0
224 SHOW.LEVELS 0 0
225 FORMAT.MAIN 0 0
226 FORMAT.OVERLAY 0 0
227 ON.RECALC 0 0
228 EDIT.SERIES 0 0
229 DEFINE.STYLE 0 0
240 LINE.PRINT 0 0
243 ENTER.DATA 0 0
249 GALLERY.RADAR 0 0
250 MERGE.STYLES 0 0
251 EDITION.OPTIONS 0 0
252 PASTE.PICTURE 0 0
253 PASTE.PICTURE.LINK 0 0
254 SPELLING 0 0
256 ZOOM 0 0
259 INSERT.OBJECT 0 0
260 WINDOW.MINIMIZE 0 0
265 SOUND.NOTE 0 0
266 SOUND.PLAY 0 0
267 FORMAT.SHAPE 0 0
268 EXTEND.POLYGON 0 0
269 FORMAT.AUTO 0 0
272 GALLERY.3D.BAR 0 0
273 GALLERY.3D.SURFACE 0 0
274 FILL.AUTO 0 0
276 CUSTOMIZE.TOOLBAR 0 0
277 ADD.TOOL 0 0
278 EDIT.OBJECT 0 0
279 ON.DOUBLECLICK 0 0
280 ON.ENTRY 0 0
281 WORKBOOK.ADD 0 0
282 WORKBOOK.MOVE 0 0
283 WORKBOOK.COPY 0 0
284 WORKBOOK.OPTIONS 0 0
285 SAVE.WORKSPACE 0 0
288 CHART.WIZARD 0 0
289 DELETE.TOOL 0 0
290 MOVE.TOOL 0 0
291 WORKBOOK.SELECT 0 0
292 WORKBOOK.ACTIVATE 0 0
293 ASSIGN.TO.TOOL 0 0
295 COPY.TOOL 0 0
296 RESET.TOOL 0 0
297 CONSTRAIN.NUMERIC 0 0
298 PASTE.TOOL 0 0
302 WORKBOOK.NEW 0 0
305 SCENARIO.CELLS 0 0
306 SCENARIO.DELETE 0 0
307 SCENARIO.ADD 0 0
308 SCENARIO.EDIT 0 0
309 SCENARIO.SHOW 0 0
310 SCENARIO.SHOW.NEXT 0 0
311 SCENARIO.SUMMARY 0 0
312 PIVOT.TABLE.WIZARD 0 0
313 PIVOT.FIELD.PROPERTIES 0 0
314 PIVOT.FIELD 0 0
315 PIVOT.ITEM 0 0
316 PIVOT.ADD.FIELDS 0 0
318 OPTIONS.CALCULATION 0 0
319 OPTIONS.EDIT 0 0
320 OPTIONS.VIEW 0 0
321 ADDIN.MANAGER 0 0
322 MENU.EDITOR 0 0
323 ATTACH.TOOLBARS 0 0
324 VBAActivate 0 0
325 OPTIONS.CHART 0 0
328 VBA.INSERT.FILE 0 0
330 VBA.PROCEDURE.DEFINITION 0 0
336 ROUTING.SLIP 0 0
338 ROUTE.DOCUMENT 0 0
339 MAIL.LOGON 0 0
342 INSERT.PICTURE 0 0
343 EDIT.TOOL 0 0
344 GALLERY.DOUGHNUT 0 0
350 CHART.TREND 0 0
352 PIVOT.ITEM.PROPERTIES 0 0
354 WORKBOOK.INSERT 0 0
355 OPTIONS.TRANSITION 0 0
356 OPTIONS.GENERAL 0 0
370 FILTER.ADVANCED 0 0
373 MAIL.ADD.MAILER 0 0
374 MAIL.DELETE.MAILER 0 0
375 MAIL.REPLY 0 0
376 MAIL.REPLY.ALL 0 0
377 MAIL.FORWARD 0 0
378 MAIL.NEXT.LETTER 0 0
379 DATA.LABEL 0 0
380 INSERT.TITLE 0 0
381 FONT.PROPERTIES 0 0
382 MACRO.OPTIONS 0 0
383 WORKBOOK.HIDE 0 0
384 WORKBOOK.UNHIDE 0 0
385 WORKBOOK.DELETE 0 0
386 WORKBOOK.NAME 0 0
388 GALLERY.CUSTOM 0 0
390 ADD.CHART.AUTOFORMAT 0 0
391 DELETE.CHART.AUTOFORMAT 0 0
392 CHART.ADD.DATA 0 0
393 AUTO.OUTLINE 0 0
394 TAB.ORDER 0 0
395 SHOW.DIALOG 0 0
396 SELECT.ALL 0 0
397 UNGROUP.SHEETS 0 0
398 SUBTOTAL.CREATE 0 0
399 SUBTOTAL.REMOVE 0 0
400 RENAME.OBJECT 0 0
412 WORKBOOK.SCROLL 0 0
413 WORKBOOK.NEXT 0 0
414 WORKBOOK.PREV 0 0
415 WORKBOOK.TAB.SPLIT 0 0
416 FULL.SCREEN 0 0
417 WORKBOOK.PROTECT 0 0
420 SCROLLBAR.PROPERTIES 0 0
421 PIVOT.SHOW.PAGES 0 0
422 TEXT.TO.COLUMNS 0 0
423 FORMAT.CHARTTYPE 0 0
424 LINK.FORMAT 0 0
425 TRACER.DISPLAY 0 0
430 TRACER.NAVIGATE 0 0
431 TRACER.CLEAR 0 0
432 TRACER.ERROR 0 0
433 PIVOT.FIELD.GROUP 0 0
434 PIVOT.FIELD.UNGROUP 0 0
435 CHECKBOX.PROPERTIES 0 0
436 LABEL.PROPERTIES 0 0
437 LISTBOX.PROPERTIES 0 0
438 EDITBOX.PROPERTIES 0 0
439 PIVOT.REFRESH 0 0
440 LINK.COMBO 0 0
441 OPEN.TEXT 0 0
442 HIDE.DIALOG 0 0
443 SET.DIALOG.FOCUS 0 0
444 ENABLE.OBJECT 0 0
445 PUSHBUTTON.PROPERTIES 0 0
446 SET.DIALOG.DEFAULT 0 0
447 FILTER 0 0
448 FILTER.SHOW.ALL 0 0
449 CLEAR.OUTLINE 0 0
450 FUNCTION.WIZARD 0 0
451 ADD.LIST.ITEM 0 0
452 SET.LIST.ITEM 0 0
453 REMOVE.LIST.ITEM 0 0
454 SELECT.LIST.ITEM 0 0
455 SET.CONTROL.VALUE 0 0
456 SAVE.COPY.AS 0 0
458 OPTIONS.LISTS.ADD 0 0
459 OPTIONS.LISTS.DELETE 0 0
460 SERIES.AXES 0 0
461 SERIES.X 0 0
462 SERIES.Y 0 0
463 ERRORBAR.X 0 0
464 ERRORBAR.Y 0 0
465 FORMAT.CHART 0 0
466 SERIES.ORDER 0 0
467 MAIL.LOGOFF 0 0
468 CLEAR.ROUTING.SLIP 0 0
469 APP.ACTIVATE.MICROSOFT 0 0
470 MAIL.EDIT.MAILER 0 0
471 ON.SHEET 0 0
472 STANDARD.WIDTH 0 0
473 SCENARIO.MERGE 0 0
474 SUMMARY.INFO 0 0
475 FIND.FILE 0 0
476 ACTIVE.CELL.FONT 0 0
477 ENABLE.TIPWIZARD 0 0
478 VBA.MAKE.ADDIN 0 0
480 INSERTDATATABLE 0 0
481 WORKGROUP.OPTIONS 0 0
482 MAIL.SEND.MAILER 0 0
485 AUTOCORRECT 0 0
489 POST.DOCUMENT 0 0
491 PICKLIST 0 0
493 VIEW.SHOW 0 0
494 VIEW.DEFINE 0 0
495 VIEW.DELETE 0 0
509 SHEET.BACKGROUND 0 0
510 INSERT.MAP.OBJECT 0 0
511 OPTIONS.MENONO 0 0
517 MSOCHECKS 0 0
518 NORMAL 0 0
519 LAYOUT 0 0
520 RM.PRINT.AREA 0 0
521 CLEAR.PRINT.AREA 0 0
522 ADD.PRINT.AREA 0 0
523 MOVE.BRK 0 0
545 HIDECURR.NOTE 0 0
546 HIDEALL.NOTES 0 0
547 DELETE.NOTE 0 0
548 TRAVERSE.NOTES 0 0
549 ACTIVATE.NOTES 0 0
620 PROTECT.REVISIONS 0 0
621 UNPROTECT.REVISIONS 0 0
647 OPTIONS.ME 0 0
653 WEB.PUBLISH 0 0
667 NEWWEBQUERY 0 0
673 PIVOT.TABLE.CHART 0 0
753 OPTIONS.SAVE 0 0
755 OPTIONS.SPELL 0 0
808 HIDEALL.INKANNOTS 0 0

View File

@ -34,7 +34,6 @@ import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.poifs.filesystem.DirectoryNode;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
import org.apache.poi.util.LocaleUtil;
import org.junit.After;
import org.junit.Test;
/**
@ -130,50 +129,42 @@ public final class TestExcelExtractor {
public void testEventExtractor() throws Exception {
// First up, a simple file with string
// based formulas in it
EventBasedExcelExtractor extractor1 = null;
try {
extractor1 = new EventBasedExcelExtractor(
new POIFSFileSystem(
HSSFTestDataSamples.openSampleFileStream("SimpleWithFormula.xls")
)
);
extractor1.setIncludeSheetNames(true);
String text = extractor1.getText();
assertEquals("Sheet1\nreplaceme\nreplaceme\nreplacemereplaceme\nSheet2\nSheet3\n", text);
extractor1.setIncludeSheetNames(false);
extractor1.setFormulasNotResults(true);
text = extractor1.getText();
assertEquals("replaceme\nreplaceme\nCONCATENATE(A1,A2)\n", text);
} finally {
if (extractor1 != null) extractor1.close();
try (EventBasedExcelExtractor extractor1 = new EventBasedExcelExtractor(
new POIFSFileSystem(
HSSFTestDataSamples.openSampleFileStream("SimpleWithFormula.xls")
)
)) {
extractor1.setIncludeSheetNames(true);
String text = extractor1.getText();
assertEquals("Sheet1\nreplaceme\nreplaceme\nreplacemereplaceme\nSheet2\nSheet3\n", text);
extractor1.setIncludeSheetNames(false);
extractor1.setFormulasNotResults(true);
text = extractor1.getText();
assertEquals("replaceme\nreplaceme\nCONCATENATE(A1,A2)\n", text);
}
// Now, a slightly longer file with numeric formulas
EventBasedExcelExtractor extractor2 = null;
try {
extractor2 = new EventBasedExcelExtractor(
new POIFSFileSystem(
HSSFTestDataSamples.openSampleFileStream("sumifformula.xls")
)
);
extractor2.setIncludeSheetNames(false);
extractor2.setFormulasNotResults(true);
String text = extractor2.getText();
assertEquals(
"1000\t1\tSUMIF(A1:A5,\">4000\",B1:B5)\n" +
"2000\t2\n" +
"3000\t3\n" +
"4000\t4\n" +
"5000\t5\n",
text
);
} finally {
if (extractor2 != null) extractor2.close();
try (EventBasedExcelExtractor extractor2 = new EventBasedExcelExtractor(
new POIFSFileSystem(
HSSFTestDataSamples.openSampleFileStream("sumifformula.xls")
)
)) {
extractor2.setIncludeSheetNames(false);
extractor2.setFormulasNotResults(true);
String text = extractor2.getText();
assertEquals(
"1000\t1\tSUMIF(A1:A5,\">4000\",B1:B5)\n" +
"2000\t2\n" +
"3000\t3\n" +
"4000\t4\n" +
"5000\t5\n",
text
);
}
}
@ -372,4 +363,25 @@ public final class TestExcelExtractor {
assertContains(txt, "NONBUSINESS");
}
}
@Test
public void test60405a() throws IOException {
//bug 61045. File is govdocs1 626534
try (ExcelExtractor extractor = createExtractor("60405.xls")) {
String txt = extractor.getText();
assertContains(txt, "Macro1");
assertContains(txt, "Macro2");
}
}
@Test
public void test60405b() throws IOException {
//bug 61045. File is govdocs1 626534
try (ExcelExtractor extractor = createExtractor("60405.xls")) {
extractor.setFormulasNotResults(true);
String txt = extractor.getText();
assertContains(txt, "Macro1");
assertContains(txt, "Macro2");
}
}
}

View File

@ -0,0 +1,378 @@
/* ====================================================================
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==================================================================== */
package org.apache.poi.ss.formula.function;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.io.PrintStream;
import java.nio.charset.StandardCharsets;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* This class is not used during normal POI run-time but is used at development time to generate
* the file 'functionMetadataCetab.txt'. There are more than 300 built-in functions in Excel in
* the Cetab and the intention of this class is to make it easier to maintain the metadata, by extracting
* it from a reliable source.
*/
public final class ExcelCetabFunctionExtractor {
private static final String SOURCE_DOC_FILE_NAME = "functionMetadataCetab-PDF.txt";
/**
* For simplicity, the output file is strictly simple ASCII.
* This method detects any unexpected characters.
*/
/* package */ static boolean isSimpleAscii(char c) {
if (c>=0x21 && c<=0x7E) {
// everything from '!' to '~' (includes letters, digits, punctuation
return true;
}
// some specific whitespace chars below 0x21:
switch(c) {
case ' ':
case '\t':
case '\r':
case '\n':
return true;
}
return false;
}
private static final class FunctionData {
// special characters from the ooo document
private static final int CHAR_ELLIPSIS_8230 = 8230;
private static final int CHAR_NDASH_8211 = 8211;
private final int _index;
private final boolean _hasFootnote;
private final String _name;
private final int _minParams;
private final int _maxParams;
private final String _returnClass;
private final String _paramClasses;
private final boolean _isVolatile;
public FunctionData(int funcIx, boolean hasFootnote, String funcName, int minParams, int maxParams,
String returnClass, String paramClasses, boolean isVolatile) {
_index = funcIx;
_hasFootnote = hasFootnote;
_name = funcName;
_minParams = minParams;
_maxParams = maxParams;
_returnClass = convertSpecialChars(returnClass);
_paramClasses = convertSpecialChars(paramClasses);
_isVolatile = isVolatile;
}
private static String convertSpecialChars(String ss) {
StringBuilder sb = new StringBuilder(ss.length() + 4);
for(int i=0; i<ss.length(); i++) {
char c = ss.charAt(i);
if (isSimpleAscii(c)) {
sb.append(c);
continue;
}
switch (c) {
case CHAR_NDASH_8211:
sb.append('-');
continue;
case CHAR_ELLIPSIS_8230:
sb.append("...");
continue;
}
throw new RuntimeException("bad char (" + ((int)c) + ") in string '" + ss + "'");
}
return sb.toString();
}
public int getIndex() {
return _index;
}
public String getName() {
return _name;
}
public boolean hasFootnote() {
return _hasFootnote;
}
public String formatAsDataLine() {
return _index + "\t" + _name + "\t" + _minParams + "\t"
+ _maxParams + "\t" + _returnClass + "\t" + _paramClasses
+ "\t" + checkMark(_isVolatile) + "\t" + checkMark(_hasFootnote);
}
private static String checkMark(boolean b) {
return b ? "x" : "";
}
}
private static final class FunctionDataCollector {
private final Map<Integer, FunctionData> _allFunctionsByIndex;
private final Map<String, FunctionData> _allFunctionsByName;
private final Set<Integer> _groupFunctionIndexes;
private final Set<String> _groupFunctionNames;
private final PrintStream _ps;
public FunctionDataCollector(PrintStream ps) {
_ps = ps;
_allFunctionsByIndex = new HashMap<>();
_allFunctionsByName = new HashMap<>();
_groupFunctionIndexes = new HashSet<>();
_groupFunctionNames = new HashSet<>();
}
public void addFunction(int funcIx, boolean hasFootnote, String funcName, int minParams, int maxParams,
String returnClass, String paramClasses, String volatileFlagStr) {
boolean isVolatile = volatileFlagStr.length() > 0;
Integer funcIxKey = Integer.valueOf(funcIx);
if(!_groupFunctionIndexes.add(funcIxKey)) {
throw new RuntimeException("Duplicate function index (" + funcIx + ")");
}
if(!_groupFunctionNames.add(funcName)) {
throw new RuntimeException("Duplicate function name '" + funcName + "'");
}
checkRedefinedFunction(hasFootnote, funcName, funcIxKey);
FunctionData fd = new FunctionData(funcIx, hasFootnote, funcName,
minParams, maxParams, returnClass, paramClasses, isVolatile);
_allFunctionsByIndex.put(funcIxKey, fd);
_allFunctionsByName.put(funcName, fd);
}
/**
* Some extra validation here.
* Any function which changes definition will have a footnote in the source document
*/
private void checkRedefinedFunction(boolean hasNote, String funcName, Integer funcIxKey) {
FunctionData fdPrev;
// check by index
fdPrev = _allFunctionsByIndex.get(funcIxKey);
if(fdPrev != null) {
if(!fdPrev.hasFootnote() || !hasNote) {
throw new RuntimeException("changing function ["
+ funcIxKey + "] definition without foot-note");
}
_allFunctionsByName.remove(fdPrev.getName());
}
// check by name
fdPrev = _allFunctionsByName.get(funcName);
if(fdPrev != null) {
if(!fdPrev.hasFootnote() || !hasNote) {
throw new RuntimeException("changing function '"
+ funcName + "' definition without foot-note");
}
_allFunctionsByIndex.remove(Integer.valueOf(fdPrev.getIndex()));
}
}
public void endTableGroup(String headingText) {
Integer[] keys = new Integer[_groupFunctionIndexes.size()];
_groupFunctionIndexes.toArray(keys);
_groupFunctionIndexes.clear();
_groupFunctionNames.clear();
Arrays.sort(keys);
_ps.println("# " + headingText);
for (Integer key : keys) {
FunctionData fd = _allFunctionsByIndex.get(key);
_ps.println(fd.formatAsDataLine());
}
}
}
private static final Pattern ID_MATCH = Pattern.compile("0x([\\dA-F]+)");
private static final Pattern NAME_MATCH = Pattern.compile("([0-9A-Z.]+)");
private static final Pattern ID_NAME_MATCH = Pattern.compile("0x([\\dA-F]+)\\s+([0-9A-Z.]+)");
private static final Set<String> IGNORED_LINES = new HashSet<>();
static {
IGNORED_LINES.add("[MS-XLS] — v20141018");
IGNORED_LINES.add("Excel Binary File Format (.xls) Structure");
IGNORED_LINES.add("Copyright © 2014 Microsoft Corporation.");
IGNORED_LINES.add("Release: October 30, 2014Value Meaning");
IGNORED_LINES.add("Release: October 30, 2014Value");
IGNORED_LINES.add("Meaning");
}
private static void extractFunctionData(FunctionDataCollector fdc, InputStream is) throws IOException {
try (BufferedReader reader = new BufferedReader(new InputStreamReader(is, StandardCharsets.UTF_8))) {
String id = null;
String name = null;
while (true) {
String line = reader.readLine();
if(line == null) {
break;
}
if(IGNORED_LINES.contains(line) || line.matches("\\d+ / \\d+")) {
continue;
}
Matcher idMatcher = ID_MATCH.matcher(line);
boolean foundID = idMatcher.matches();
Matcher nameMatcher = NAME_MATCH.matcher(line);
boolean foundName = nameMatcher.matches();
Matcher idAndNameMatcher = ID_NAME_MATCH.matcher(line);
boolean foundIDandName = idAndNameMatcher.matches();
if(foundID && foundName ||
foundName && foundIDandName ||
foundID && foundIDandName) {
throw new IllegalStateException("Should not find id and name: " +
foundID + "/" + foundName + "/" + foundIDandName +
", line: " + line);
}
if(foundID && id != null) {
throw new IllegalStateException("Found ID, but already had one: " + id + ", line: " + line);
}
if(foundName && name != null) {
throw new IllegalStateException("Found name, but already had one: " + name + ", line: " + line);
}
if(foundIDandName && (name != null || id != null)) {
throw new IllegalStateException("Found name and id, but already had one: id: " + id + ", name: " + name + ", line: " + line);
}
if(foundID) {
id = idMatcher.group(1);
} else if (foundName) {
name = nameMatcher.group(1);
} else if (foundIDandName) {
id = idAndNameMatcher.group(1);
name = idAndNameMatcher.group(2);
// manual override for one function name which contains lowercase characters
} else if(line.equals("VBAActivate")) {
name = line;
} else if (id == null || name == null) {
throw new IllegalStateException("Found params, but had empty id or name, id: " + id +
", name: " + name + ", line: " + line);
} else {
System.out.println("Found function " + id + " " + name + " " + line);
fdc.addFunction(Integer.parseInt(id, 16), false, name, 0, 0,
"", "", "");
id = null;
name = null;
}
}
}
fdc.endTableGroup("");
}
/**
* To be sure that no tricky unicode chars make it through to the output file.
*/
private static final class SimpleAsciiOutputStream extends OutputStream {
private final OutputStream _os;
public SimpleAsciiOutputStream(OutputStream os) {
_os = os;
}
@Override
public void write(int b) throws IOException {
checkByte(b);
_os.write(b);
}
private static void checkByte(int b) {
if (!isSimpleAscii((char)b)) {
throw new RuntimeException("Encountered char (" + b + ") which was not simple ascii as expected");
}
}
@Override
public void write(byte[] b, int off, int len) throws IOException {
for (int i = 0; i < len; i++) {
checkByte(b[i + off]);
}
_os.write(b, off, len);
}
}
private static void processFile(InputStream input, File outFile) throws IOException {
try (OutputStream os = new SimpleAsciiOutputStream(new FileOutputStream(outFile));
PrintStream ps = new PrintStream(os, true, "UTF-8")) {
outputLicenseHeader(ps);
Class<?> genClass = ExcelCetabFunctionExtractor.class;
ps.println("# Created by (" + genClass.getName() + ")");
// identify the source file
ps.println("# from source file '" + SOURCE_DOC_FILE_NAME + "'");
ps.println("#");
ps.println("#Columns: (index, name, minParams, maxParams, returnClass, paramClasses, isVolatile, hasFootnote )");
ps.println();
extractFunctionData(new FunctionDataCollector(ps), input);
ps.close();
String canonicalOutputFileName = outFile.getCanonicalPath();
System.out.println("Successfully output to '" + canonicalOutputFileName + "'");
}
}
private static void outputLicenseHeader(PrintStream ps) {
String[] lines= {
"Licensed to the Apache Software Foundation (ASF) under one or more",
"contributor license agreements. See the NOTICE file distributed with",
"this work for additional information regarding copyright ownership.",
"The ASF licenses this file to You under the Apache License, Version 2.0",
"(the \"License\"); you may not use this file except in compliance with",
"the License. You may obtain a copy of the License at",
"",
" http://www.apache.org/licenses/LICENSE-2.0",
"",
"Unless required by applicable law or agreed to in writing, software",
"distributed under the License is distributed on an \"AS IS\" BASIS,",
"WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.",
"See the License for the specific language governing permissions and",
"limitations under the License.",
};
for (String line : lines) {
ps.print("# ");
ps.println(line);
}
ps.println();
}
public static void main(String[] args) throws IOException {
if(!new File(SOURCE_DOC_FILE_NAME).exists()) {
throw new IllegalStateException("Did not find file " + SOURCE_DOC_FILE_NAME + " in the resources");
}
try (InputStream stream = new FileInputStream(SOURCE_DOC_FILE_NAME)) {
File outFile = new File("functionMetadataCetab.txt");
processFile(stream, outFile);
}
}
}

Binary file not shown.