diff --git a/src/documentation/content/xdocs/changes.xml b/src/documentation/content/xdocs/changes.xml index 561ab0d4d2..4ed05d00f1 100644 --- a/src/documentation/content/xdocs/changes.xml +++ b/src/documentation/content/xdocs/changes.xml @@ -36,7 +36,7 @@ - + 45018 - Support for fetching embeded documents from within an OOXML file Port support for setting a policy on missing / blank cells when fetching, to XSSF too Common text extraction factory, which returns the correct POITextExtractor for the supplied data @@ -45,8 +45,15 @@ Created a common interface for handling PowerPoint files, irrespective of if they are .ppt or .pptx Created a common interface for handling Excel files, irrespective of if they are .xls or .xlsx - - + + 45338 - Fix HSSFWorkbook to give you the same HSSFFont every time, and then fix it to find newly added fonts + 45336 - Fix HSSFColor.getTripletHash() + 45334 - Fixed formula parser to handle dots in identifiers + 45252 - Improvement for HWPF Range.replaceText() + 45001 - Further fix for HWPF Range.delete() and unicode characters + 45175 - Support for variable length operands in org.apache.poi.hwpf.sprm.SprmOperation + Avoid spurious missing lines with the MissingRecordAware event code, and odd files that contain RowRecords in the middle of the cell Records. + Support for parsing formulas during EventUserModel processing, via the new EventWorkbookBuilder 30978 - Fixed re-serialization of tRefErr3d and tAreaErr3d diff --git a/src/documentation/content/xdocs/faq.xml b/src/documentation/content/xdocs/faq.xml index e74b6f0901..22c1a4cb4d 100644 --- a/src/documentation/content/xdocs/faq.xml +++ b/src/documentation/content/xdocs/faq.xml @@ -20,6 +20,36 @@ + + + My code uses some new HSSF feature, compiles fine but fails when live with a "MethodNotFoundException" + + +

You almost certainly have an older version of POI earlier + on your classpath. Quite a few runtimes and other packages + will ship an older version of POI, so this is an easy problem + to hit without realising.

+

The best way to identify the offending earlier jar file is + with a few lines of java. These will load one of the core POI + classes, and report where it came from.

+ +ClassLoader classloader = org.apache.poi.poifs.filesystem.POIFSFileSystem.class.getClassLoader(); +URL res = classloader.getResource("org/apache/poi/poifs/filesystem/POIFSFileSystem.class"> +String path = res.getPath(); +System.out.println("Core POI came from " + path); + +
+
+ + + My code uses the scratchpad, compiles fine but fails to run with a "MethodNotFoundException" + + +

You almost certainly have an older version earlier on your + classpath. See the answer to the similar question above for + how to track this down.

+
+
Why is reading a simple sheet taking so long? diff --git a/src/documentation/content/xdocs/index.xml b/src/documentation/content/xdocs/index.xml index b881cc0a2a..b632fb93ff 100644 --- a/src/documentation/content/xdocs/index.xml +++ b/src/documentation/content/xdocs/index.xml @@ -31,35 +31,24 @@ -
Office Open XML Support +
POI 3.5.1 beta 1, and Office Open XML Support (2008-07-11)

We are currently working to support the new Office Open XML file formats, such as XLSX and PPTX, which were introduced in Office 2007.

-

Support for these is currently only available in an svn branch, - but we hope to have a full release including it by the summer. - People interested should follow the +

Development for this is in a svn branch, but we are please to + announce our first preview release containing this support. + Users interested in the OOXML support should download the + POI 3.5.1 beta 1 + the source and binaries from their + local mirror. + People interested should also follow the dev list to track progress.

-
POI 3.1-BETA2 Released (2008-05-28) +
POI 3.1-FINAL Released (2008-06-29)

- The POI team is pleased to announce the release of 3.1 BETA2 which is one of the final steps before 3.1 FINAL. - The status of this release is a beta, meaning that we encourage users to try it out. - If you find any bugs, please report them to the POI bug database or to - the POI Developer List. -

A full list of changes is available in - the changelog, and - download - the source and binaries from your - local mirror. -

-

- The release is also available from the central Maven repository - under Group ID "org.apache.poi" and Version "3.1-beta2". -

-
-
POI 3.0.2 Released -

The POI team is pleased to announce POI 3.0.2, the latest release of Apache POI. - There have been many important bug fixes since the 3.0.1 release and a lot of new features. A full list of changes is available in + The POI team is pleased to announce the release of 3.1 FINAL, the latest release of Apache POI. + There have been many important bug fixes since the 3.0.2 release and a lot of new features. +

A full list of changes is available in the changelog, and download the source and binaries from your @@ -67,13 +56,8 @@

The release is also available from the central Maven repository - under Group ID "org.apache.poi" and Version "3.0.2-FINAL". + under Group ID "org.apache.poi" and Version "3.1-FINAL".

-

We would also like to confirm that verions 3.0 and 3.0.1 of Apache - POI do - not contain any viruses. Users of broken virus checkers - which do detect a 94 byte file, sci_cec.db, as containing one are - advised to contact their vendor for a fix.

Purpose diff --git a/src/documentation/content/xdocs/status.xml b/src/documentation/content/xdocs/status.xml index ca47e0f209..f96dfc33ab 100644 --- a/src/documentation/content/xdocs/status.xml +++ b/src/documentation/content/xdocs/status.xml @@ -33,7 +33,7 @@ - + 45018 - Support for fetching embeded documents from within an OOXML file Port support for setting a policy on missing / blank cells when fetching, to XSSF too Common text extraction factory, which returns the correct POITextExtractor for the supplied data @@ -42,8 +42,15 @@ Created a common interface for handling PowerPoint files, irrespective of if they are .ppt or .pptx Created a common interface for handling Excel files, irrespective of if they are .xls or .xlsx - - + + 45338 - Fix HSSFWorkbook to give you the same HSSFFont every time, and then fix it to find newly added fonts + 45336 - Fix HSSFColor.getTripletHash() + 45334 - Fixed formula parser to handle dots in identifiers + 45252 - Improvement for HWPF Range.replaceText() + 45001 - Further fix for HWPF Range.delete() and unicode characters + 45175 - Support for variable length operands in org.apache.poi.hwpf.sprm.SprmOperation + Avoid spurious missing lines with the MissingRecordAware event code, and odd files that contain RowRecords in the middle of the cell Records. + Support for parsing formulas during EventUserModel processing, via the new EventWorkbookBuilder 30978 - Fixed re-serialization of tRefErr3d and tAreaErr3d diff --git a/src/examples/src/org/apache/poi/hssf/eventusermodel/examples/XLS2CSVmra.java b/src/examples/src/org/apache/poi/hssf/eventusermodel/examples/XLS2CSVmra.java index 9bebd3a837..1c9b220356 100644 --- a/src/examples/src/org/apache/poi/hssf/eventusermodel/examples/XLS2CSVmra.java +++ b/src/examples/src/org/apache/poi/hssf/eventusermodel/examples/XLS2CSVmra.java @@ -30,9 +30,11 @@ import org.apache.poi.hssf.eventusermodel.HSSFEventFactory; import org.apache.poi.hssf.eventusermodel.HSSFListener; import org.apache.poi.hssf.eventusermodel.HSSFRequest; import org.apache.poi.hssf.eventusermodel.MissingRecordAwareHSSFListener; +import org.apache.poi.hssf.eventusermodel.EventWorkbookBuilder.SheetRecordCollectingListener; import org.apache.poi.hssf.eventusermodel.dummyrecord.LastCellOfRowDummyRecord; import org.apache.poi.hssf.eventusermodel.dummyrecord.MissingCellDummyRecord; import org.apache.poi.hssf.model.FormulaParser; +import org.apache.poi.hssf.record.BOFRecord; import org.apache.poi.hssf.record.BlankRecord; import org.apache.poi.hssf.record.BoolErrRecord; import org.apache.poi.hssf.record.CellValueRecordInterface; @@ -46,6 +48,7 @@ import org.apache.poi.hssf.record.Record; import org.apache.poi.hssf.record.SSTRecord; import org.apache.poi.hssf.record.StringRecord; import org.apache.poi.hssf.usermodel.HSSFDateUtil; +import org.apache.poi.hssf.usermodel.HSSFWorkbook; import org.apache.poi.poifs.filesystem.POIFSFileSystem; /** @@ -64,6 +67,10 @@ public class XLS2CSVmra implements HSSFListener { /** Should we output the formula, or the value it has? */ private boolean outputFormulaValues = true; + /** For parsing Formulas */ + private SheetRecordCollectingListener workbookBuildingListener; + private HSSFWorkbook stubWorkbook; + // Records we pick up as we process private SSTRecord sstRecord; private FormatTrackingHSSFListener formatListener; @@ -108,7 +115,13 @@ public class XLS2CSVmra implements HSSFListener { HSSFEventFactory factory = new HSSFEventFactory(); HSSFRequest request = new HSSFRequest(); - request.addListenerForAllRecords(formatListener); + + if(outputFormulaValues) { + request.addListenerForAllRecords(formatListener); + } else { + workbookBuildingListener = new SheetRecordCollectingListener(formatListener); + request.addListenerForAllRecords(workbookBuildingListener); + } factory.processWorkbookEvents(request, fs); } @@ -124,6 +137,16 @@ public class XLS2CSVmra implements HSSFListener { switch (record.getSid()) { + case BOFRecord.sid: + BOFRecord br = (BOFRecord)record; + if(br.getType() == BOFRecord.TYPE_WORKSHEET) { + // Create sub workbook if required + if(workbookBuildingListener != null && stubWorkbook == null) { + stubWorkbook = workbookBuildingListener.getStubHSSFWorkbook(); + } + } + break; + case SSTRecord.sid: sstRecord = (SSTRecord) record; break; @@ -161,7 +184,7 @@ public class XLS2CSVmra implements HSSFListener { } } else { thisStr = '"' + - FormulaParser.toFormulaString(null, frec.getParsedExpression()) + '"'; + FormulaParser.toFormulaString(stubWorkbook, frec.getParsedExpression()) + '"'; } break; case StringRecord.sid: diff --git a/src/java/org/apache/poi/hssf/eventusermodel/EventWorkbookBuilder.java b/src/java/org/apache/poi/hssf/eventusermodel/EventWorkbookBuilder.java new file mode 100644 index 0000000000..0ae5f3f260 --- /dev/null +++ b/src/java/org/apache/poi/hssf/eventusermodel/EventWorkbookBuilder.java @@ -0,0 +1,199 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ +package org.apache.poi.hssf.eventusermodel; + +import java.util.ArrayList; +import java.util.List; + +import org.apache.poi.hssf.model.FormulaParser; +import org.apache.poi.hssf.model.Workbook; +import org.apache.poi.hssf.record.BoundSheetRecord; +import org.apache.poi.hssf.record.EOFRecord; +import org.apache.poi.hssf.record.ExternSheetRecord; +import org.apache.poi.hssf.record.Record; +import org.apache.poi.hssf.record.SSTRecord; +import org.apache.poi.hssf.record.SupBookRecord; +import org.apache.poi.hssf.usermodel.HSSFWorkbook; + +/** + * When working with the EventUserModel, if you want to + * process formulas, you need an instance of + * {@link Workbook} to pass to a {@link HSSFWorkbook}, + * to finally give to {@link FormulaParser}, + * and this will build you stub ones. + * Since you're working with the EventUserModel, you + * wouldn't want to get a full {@link Workbook} and + * {@link HSSFWorkbook}, as they would eat too much memory. + * Instead, you should collect a few key records as they + * go past, then call this once you have them to build a + * stub {@link Workbook}, and from that a stub + * {@link HSSFWorkbook}, to use with the {@link FormulaParser}. + * + * The records you should collect are: + * * {@link ExternSheetRecord} + * * {@link BoundSheetRecord} + * You should probably also collect {@link SSTRecord}, + * but it's not required to pass this in. + * + * To help, this class includes a HSSFListener wrapper + * that will do the collecting for you. + */ +public class EventWorkbookBuilder { + /** + * Wraps up your stub {@link Workbook} as a stub + * {@link HSSFWorkbook}, ready for passing to + * {@link FormulaParser} + * @param workbook A stub {@link Workbook} + */ + public static HSSFWorkbook createStubHSSFWorkbook(Workbook workbook) { + return new StubHSSFWorkbook(workbook); + } + + /** + * Creates a stub Workbook from the supplied records, + * suitable for use with the {@link FormulaParser} + * @param externs The ExternSheetRecords in your file + * @param bounds The BoundSheetRecords in your file + * @param sst The SSTRecord in your file. + * @return A stub Workbook suitable for use with {@link FormulaParser} + */ + public static Workbook createStubWorkbook(ExternSheetRecord[] externs, + BoundSheetRecord[] bounds, SSTRecord sst) { + List wbRecords = new ArrayList(); + + // Core Workbook records go first + if(bounds != null) { + for(int i=0; i -1) { + for(int i=lastCellRow; i + * Note - identifiers in Excel can contain dots, so this method may return a String + * which may need to be converted to an area reference. For example, this method + * may return a value like "A1..B2", in which case the caller must convert it to + * an area reference like "A1:B2" + */ + private String parseIdentifier() { StringBuffer Token = new StringBuffer(); if (!IsAlpha(look) && look != '\'') { throw expected("Name"); @@ -201,7 +209,9 @@ public final class FormulaParser { } else { - while (IsAlNum(look)) { + // allow for any sequence of dots and identifier chars + // special case of two consecutive dots is best treated in the calling code + while (IsAlNum(look) || look == '.') { Token.append(look); GetChar(); } @@ -220,15 +230,22 @@ public final class FormulaParser { return value.length() == 0 ? null : value.toString(); } - private ParseNode parseFunctionOrIdentifier() { - String name = GetName(); + private ParseNode parseFunctionReferenceOrName() { + String name = parseIdentifier(); if (look == '('){ //This is a function return function(name); } - return new ParseNode(parseIdentifier(name)); + return new ParseNode(parseNameOrReference(name)); } - private Ptg parseIdentifier(String name) { + + private Ptg parseNameOrReference(String name) { + + AreaReference areaRef = parseArea(name); + if (areaRef != null) { + // will happen if dots are used instead of colon + return new AreaPtg(areaRef.formatAsString()); + } if (look == ':' || look == '.') { // this is a AreaReference GetChar(); @@ -238,23 +255,28 @@ public final class FormulaParser { } String first = name; - String second = GetName(); + String second = parseIdentifier(); return new AreaPtg(first+":"+second); } if (look == '!') { Match('!'); String sheetName = name; - String first = GetName(); + String first = parseIdentifier(); short externIdx = (short)book.getExternalSheetIndex(book.getSheetIndex(sheetName)); + areaRef = parseArea(name); + if (areaRef != null) { + // will happen if dots are used instead of colon + return new Area3DPtg(areaRef.formatAsString(), externIdx); + } if (look == ':') { Match(':'); - String second=GetName(); + String second=parseIdentifier(); if (look == '!') { //The sheet name was included in both of the areas. Only really //need it once Match('!'); - String third=GetName(); + String third=parseIdentifier(); if (!sheetName.equals(second)) throw new RuntimeException("Unhandled double sheet reference."); @@ -271,9 +293,7 @@ public final class FormulaParser { // This can be either a cell ref or a named range // Try to spot which it is - boolean cellRef = CELL_REFERENCE_PATTERN.matcher(name).matches(); - - if (cellRef) { + if (isValidCellReference(name)) { return new RefPtg(name); } @@ -287,6 +307,41 @@ public final class FormulaParser { + name + "\", but that named range wasn't defined!"); } + /** + * @return null if name cannot be split at a dot + */ + private AreaReference parseArea(String name) { + int dotPos = name.indexOf('.'); + if (dotPos < 0) { + return null; + } + int dotCount = 1; + while (dotCount3) { + // four or more consecutive dots does not convert to ':' + return null; + } + } + String partA = name.substring(0, dotPos); + if (!isValidCellReference(partA)) { + return null; + } + String partB = name.substring(dotPos+dotCount); + if (!isValidCellReference(partB)) { + return null; + } + CellReference topLeft = new CellReference(partA); + CellReference bottomRight = new CellReference(partB); + return new AreaReference(topLeft, bottomRight); + } + + private static boolean isValidCellReference(String str) { + // TODO - exact rules for recognising cell references may be too complicated for regex + return CELL_REFERENCE_PATTERN.matcher(str).matches(); + } + + /** * Note - Excel function names are 'case aware but not case sensitive'. This method may end * up creating a defined name record in the workbook if the specified name is not an internal @@ -465,7 +520,7 @@ public final class FormulaParser { return new ParseNode(parseStringLiteral()); } if (IsAlpha(look) || look == '\''){ - return parseFunctionOrIdentifier(); + return parseFunctionReferenceOrName(); } // else - assume number return new ParseNode(parseNumber()); @@ -510,7 +565,7 @@ public final class FormulaParser { private ErrPtg parseErrorLiteral() { Match('#'); - String part1 = GetName().toUpperCase(); + String part1 = parseIdentifier().toUpperCase(); switch(part1.charAt(0)) { case 'V': diff --git a/src/java/org/apache/poi/hssf/record/formula/functions/Errortype.java b/src/java/org/apache/poi/hssf/record/formula/functions/Errortype.java index 51268c9aa0..dd72eb5c79 100644 --- a/src/java/org/apache/poi/hssf/record/formula/functions/Errortype.java +++ b/src/java/org/apache/poi/hssf/record/formula/functions/Errortype.java @@ -1,25 +1,78 @@ -/* -* Licensed to the Apache Software Foundation (ASF) under one or more -* contributor license agreements. See the NOTICE file distributed with -* this work for additional information regarding copyright ownership. -* The ASF licenses this file to You under the Apache License, Version 2.0 -* (the "License"); you may not use this file except in compliance with -* the License. You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, software -* distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions and -* limitations under the License. -*/ -/* - * Created on May 15, 2005 - * - */ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ + package org.apache.poi.hssf.record.formula.functions; -public class Errortype extends NotImplementedFunction { +import org.apache.poi.hssf.record.formula.eval.ErrorEval; +import org.apache.poi.hssf.record.formula.eval.Eval; +import org.apache.poi.hssf.record.formula.eval.EvaluationException; +import org.apache.poi.hssf.record.formula.eval.NumberEval; +import org.apache.poi.hssf.record.formula.eval.OperandResolver; +import org.apache.poi.hssf.usermodel.HSSFErrorConstants; + +/** + * Implementation for the ERROR.TYPE() Excel function.

+ * + * Syntax:
+ * ERROR.TYPE(errorValue)

+ * + * Returns a number corresponding to the error type of the supplied argument.

+ * + * + * + * + * + * + * + * + * + * + * + *
errorValueReturn Value
#NULL!1
#DIV/0!2
#VALUE!3
#REF!4
#NAME?5
#NUM!6
#N/A!7
everything else#N/A!
+ * + * Note - the results of ERROR.TYPE() are different to the constants defined in + * HSSFErrorConstants. + * + * @author Josh Micich + */ +public final class Errortype implements Function { + + public Eval evaluate(Eval[] args, int srcCellRow, short srcCellCol) { + + try { + OperandResolver.getSingleValue(args[0], srcCellRow, srcCellCol); + return ErrorEval.NA; + } catch (EvaluationException e) { + int result = translateErrorCodeToErrorTypeValue(e.getErrorEval().getErrorCode()); + return new NumberEval(result); + } + } + + private int translateErrorCodeToErrorTypeValue(int errorCode) { + switch (errorCode) { + case HSSFErrorConstants.ERROR_NULL: return 1; + case HSSFErrorConstants.ERROR_DIV_0: return 2; + case HSSFErrorConstants.ERROR_VALUE: return 3; + case HSSFErrorConstants.ERROR_REF: return 4; + case HSSFErrorConstants.ERROR_NAME: return 5; + case HSSFErrorConstants.ERROR_NUM: return 6; + case HSSFErrorConstants.ERROR_NA : return 7; + } + throw new IllegalArgumentException("Invalid error code (" + errorCode + ")"); + } } diff --git a/src/java/org/apache/poi/hssf/usermodel/HSSFWorkbook.java b/src/java/org/apache/poi/hssf/usermodel/HSSFWorkbook.java index 3aac4b5992..b80ccb7905 100644 --- a/src/java/org/apache/poi/hssf/usermodel/HSSFWorkbook.java +++ b/src/java/org/apache/poi/hssf/usermodel/HSSFWorkbook.java @@ -24,6 +24,7 @@ import java.io.InputStream; import java.io.OutputStream; import java.io.PrintWriter; import java.util.ArrayList; +import java.util.Hashtable; import java.util.Iterator; import java.util.List; import java.util.Stack; @@ -105,6 +106,12 @@ public class HSSFWorkbook extends POIDocument implements org.apache.poi.ss.userm */ private ArrayList names; + + /** + * this holds the HSSFFont objects attached to this workbook. + * We only create these from the low level records as required. + */ + private Hashtable fonts; /** * holds whether or not to preserve other nodes in the POIFS. Used @@ -1021,9 +1028,10 @@ public class HSSFWorkbook extends POIDocument implements org.apache.poi.ss.userm if(fontindex == Short.MAX_VALUE){ throw new IllegalArgumentException("Maximum number of fonts was exceeded"); } - HSSFFont retval = new HSSFFont(fontindex, font); - - return retval; + + // Ask getFontAt() to build it for us, + // so it gets properly cached + return getFontAt(fontindex); } /** @@ -1033,15 +1041,11 @@ public class HSSFWorkbook extends POIDocument implements org.apache.poi.ss.userm String name, boolean italic, boolean strikeout, short typeOffset, byte underline) { -// System.out.println( boldWeight + ", " + color + ", " + fontHeight + ", " + name + ", " + italic + ", " + strikeout + ", " + typeOffset + ", " + underline ); - for (short i = 0; i < workbook.getNumberOfFontRecords(); i++) - { - if (i == 4) - continue; - - FontRecord font = workbook.getFontRecordAt(i); - HSSFFont hssfFont = new HSSFFont(i, font); -// System.out.println( hssfFont.getBoldweight() + ", " + hssfFont.getColor() + ", " + hssfFont.getFontHeight() + ", " + hssfFont.getFontName() + ", " + hssfFont.getItalic() + ", " + hssfFont.getStrikeout() + ", " + hssfFont.getTypeOffset() + ", " + hssfFont.getUnderline() ); + for (short i=0; i<=getNumberOfFonts(); i++) { + // Remember - there is no 4! + if(i == 4) continue; + + HSSFFont hssfFont = getFontAt(i); if (hssfFont.getBoldweight() == boldWeight && hssfFont.getColor() == color && hssfFont.getFontHeight() == fontHeight @@ -1051,12 +1055,10 @@ public class HSSFWorkbook extends POIDocument implements org.apache.poi.ss.userm && hssfFont.getTypeOffset() == typeOffset && hssfFont.getUnderline() == underline) { -// System.out.println( "Found font" ); return hssfFont; } } -// System.out.println( "No font found" ); return null; } @@ -1071,15 +1073,26 @@ public class HSSFWorkbook extends POIDocument implements org.apache.poi.ss.userm } /** - * get the font at the given index number + * Get the font at the given index number * @param idx index number * @return HSSFFont at the index */ public HSSFFont getFontAt(short idx) { + if(fonts == null) fonts = new Hashtable(); + + // So we don't confuse users, give them back + // the same object every time, but create + // them lazily + Short sIdx = Short.valueOf(idx); + if(fonts.containsKey(sIdx)) { + return (HSSFFont)fonts.get(sIdx); + } + FontRecord font = workbook.getFontRecordAt(idx); HSSFFont retval = new HSSFFont(idx, font); + fonts.put(sIdx, retval); return retval; } diff --git a/src/java/org/apache/poi/hssf/util/HSSFColor.java b/src/java/org/apache/poi/hssf/util/HSSFColor.java index 2c51b3d208..d13baecf80 100644 --- a/src/java/org/apache/poi/hssf/util/HSSFColor.java +++ b/src/java/org/apache/poi/hssf/util/HSSFColor.java @@ -155,8 +155,12 @@ public class HSSFColor implements Color { String hexString = color.getHexString(); if (result.containsKey(hexString)) { - throw new RuntimeException("Dup color hexString (" + hexString - + ") for color (" + color.getClass().getName() + ")"); + HSSFColor other = (HSSFColor)result.get(hexString); + throw new RuntimeException( + "Dup color hexString (" + hexString + + ") for color (" + color.getClass().getName() + ") - " + + " already taken by (" + other.getClass().getName() + ")" + ); } result.put(hexString, color); } @@ -1511,9 +1515,9 @@ public class HSSFColor implements Color { public final static short index = 0x19; public final static short[] triplet = { - 153, 51, 102 + 127, 0, 0 }; - public final static String hexString = "9999:3333:6666"; + public final static String hexString = "8000:0:0"; public short getIndex() { diff --git a/src/scratchpad/src/org/apache/poi/hwpf/model/TextPiece.java b/src/scratchpad/src/org/apache/poi/hwpf/model/TextPiece.java index bc33954dff..227200ab5d 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/model/TextPiece.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/model/TextPiece.java @@ -91,15 +91,18 @@ public class TextPiece extends PropertyNode implements Comparable public void adjustForDelete(int start, int length) { + // length is expected to be the number of code-points, + // not the number of characters + int numChars = length; if (usesUnicode()) { start /= 2; - length /= 2; + numChars = (length / 2); } int myStart = getStart(); int myEnd = getEnd(); - int end = start + length; + int end = start + numChars; /* do we have to delete from this text piece? */ if (start <= myEnd && end >= myStart) { @@ -108,9 +111,14 @@ public class TextPiece extends PropertyNode implements Comparable int overlapStart = Math.max(myStart, start); int overlapEnd = Math.min(myEnd, end); ((StringBuffer)_buf).delete(overlapStart, overlapEnd); - - super.adjustForDelete(start, length); } + + // We need to invoke this even if text from this piece is not being + // deleted because the adjustment must propagate to all subsequent + // text pieces i.e., if text from tp[n] is being deleted, then + // tp[n + 1], tp[n + 2], etc. will need to be adjusted. + // The superclass is expected to use a separate sentry for this. + super.adjustForDelete(start, length); } public int characterLength() diff --git a/src/scratchpad/src/org/apache/poi/hwpf/sprm/SprmOperation.java b/src/scratchpad/src/org/apache/poi/hwpf/sprm/SprmOperation.java index cacbbaaa38..764fc06c4d 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/sprm/SprmOperation.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/sprm/SprmOperation.java @@ -101,7 +101,14 @@ public class SprmOperation case 3: return LittleEndian.getInt(_grpprl, _gOffset); case 6: - throw new UnsupportedOperationException("This SPRM contains a variable length operand"); + byte operandLength = _grpprl[_gOffset + 1]; //surely shorter than an int... + + byte [] codeBytes = new byte[LittleEndian.INT_SIZE]; //initialized to zeros by JVM + for(int i = 0; i < operandLength; i++) + if(_gOffset + i < _grpprl.length) + codeBytes[i] = _grpprl[_gOffset + 1 + i]; + + return LittleEndian.getInt(codeBytes, 0); case 7: byte threeByteInt[] = new byte[4]; threeByteInt[0] = _grpprl[_gOffset]; diff --git a/src/scratchpad/src/org/apache/poi/hwpf/usermodel/Range.java b/src/scratchpad/src/org/apache/poi/hwpf/usermodel/Range.java index 80e9b7526c..0ef944f136 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/usermodel/Range.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/usermodel/Range.java @@ -333,7 +333,7 @@ public class Range _doc.getCharacterTable().adjustForInsert(_charStart, adjustedLength); _doc.getParagraphTable().adjustForInsert(_parStart, adjustedLength); _doc.getSectionTable().adjustForInsert(_sectionStart, adjustedLength); - adjustForInsert(text.length()); + adjustForInsert(adjustedLength); // update the FIB.CCPText field adjustFIB(text.length()); @@ -656,8 +656,15 @@ public class Range ); } + // this Range isn't a proper parent of the subRange() so we'll have to keep + // track of an updated endOffset on our own + int previousEndOffset = subRange.getEndOffset(); + subRange.insertBefore(pValue); + if (subRange.getEndOffset() != previousEndOffset) + _end += (subRange.getEndOffset() - previousEndOffset); + // re-create the sub-range so we can delete it subRange = new Range( (absPlaceHolderIndex + pValue.length()), @@ -671,9 +678,30 @@ public class Range (pValue.length() * 2)), getDocument() ); + // deletes are automagically propagated subRange.delete(); } + /** + * Replace (all instances of) a piece of text with another... + * + * @param pPlaceHolder The text to be replaced (e.g., "${organization}") + * @param pValue The replacement text (e.g., "Apache Software Foundation") + */ + public void replaceText(String pPlaceHolder, String pValue) + { + boolean keepLooking = true; + while (keepLooking) { + + String text = text(); + int offset = text.indexOf(pPlaceHolder); + if (offset >= 0) + replaceText(pPlaceHolder, pValue, offset); + else + keepLooking = false; + } + } + /** * Gets the character run at index. The index is relative to this range. * @@ -915,7 +943,7 @@ public class Range /** * adjust this range after an insert happens. - * @param length the length to adjust for + * @param length the length to adjust for (expected to be a count of code-points, not necessarily chars) */ private void adjustForInsert(int length) { diff --git a/src/scratchpad/testcases/org/apache/poi/hwpf/data/testRangeDelete.doc b/src/scratchpad/testcases/org/apache/poi/hwpf/data/testRangeDelete.doc new file mode 100644 index 0000000000..896108397c Binary files /dev/null and b/src/scratchpad/testcases/org/apache/poi/hwpf/data/testRangeDelete.doc differ diff --git a/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestRangeDelete.java b/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestRangeDelete.java new file mode 100644 index 0000000000..1becc234c3 --- /dev/null +++ b/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestRangeDelete.java @@ -0,0 +1,196 @@ + +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ + +package org.apache.poi.hwpf.usermodel; + +import java.io.ByteArrayOutputStream; +import java.io.FileInputStream; +import java.util.List; + +import org.apache.poi.hwpf.HWPFDocument; +import org.apache.poi.hwpf.model.PicturesTable; +import org.apache.poi.hwpf.usermodel.Picture; + +import junit.framework.TestCase; + +/** + * Test to see if Range.delete() works even if the Range contains a + * CharacterRun that uses Unicode characters. + */ +public class TestRangeDelete extends TestCase { + + // u201c and u201d are "smart-quotes" + private String originalText = + "It is used to confirm that text delete works even if Unicode characters (such as \u201c\u2014\u201d (U+2014), \u201c\u2e8e\u201d (U+2E8E), or \u201c\u2714\u201d (U+2714)) are present. Everybody should be thankful to the ${organization} ${delete} and all the POI contributors for their assistance in this matter.\r"; + private String searchText = "${delete}"; + private String expectedText1 = " This is an MS-Word 97 formatted document created using NeoOffice v. 2.2.4 Patch 0 (OpenOffice.org v. 2.2.1).\r"; + private String expectedText2 = + "It is used to confirm that text delete works even if Unicode characters (such as \u201c\u2014\u201d (U+2014), \u201c\u2e8e\u201d (U+2E8E), or \u201c\u2714\u201d (U+2714)) are present. Everybody should be thankful to the ${organization} and all the POI contributors for their assistance in this matter.\r"; + private String expectedText3 = "Thank you, ${organization} !\r"; + + private String illustrativeDocFile; + + protected void setUp() throws Exception { + + String dirname = System.getProperty("HWPF.testdata.path"); + + illustrativeDocFile = dirname + "/testRangeDelete.doc"; + } + + /** + * Test just opening the files + */ + public void testOpen() throws Exception { + + HWPFDocument docA = new HWPFDocument(new FileInputStream(illustrativeDocFile)); + } + + /** + * Test (more "confirm" than test) that we have the general structure that we expect to have. + */ + public void testDocStructure() throws Exception { + + HWPFDocument daDoc = new HWPFDocument(new FileInputStream(illustrativeDocFile)); + + Range range = daDoc.getRange(); + + assertEquals(1, range.numSections()); + Section section = range.getSection(0); + + assertEquals(5, section.numParagraphs()); + Paragraph para = section.getParagraph(2); + + assertEquals(5, para.numCharacterRuns()); + + assertEquals(originalText, para.text()); + } + + /** + * Test that we can delete text (one instance) from our Range with Unicode text. + */ + public void testRangeDeleteOne() throws Exception { + + HWPFDocument daDoc = new HWPFDocument(new FileInputStream(illustrativeDocFile)); + + Range range = daDoc.getRange(); + assertEquals(1, range.numSections()); + + Section section = range.getSection(0); + assertEquals(5, section.numParagraphs()); + + Paragraph para = section.getParagraph(2); + + String text = para.text(); + assertEquals(originalText, text); + + int offset = text.indexOf(searchText); + assertEquals(192, offset); + + int absOffset = para.getStartOffset() + offset; + if (para.usesUnicode()) + absOffset = para.getStartOffset() + (offset * 2); + + Range subRange = new Range(absOffset, (absOffset + searchText.length()), para.getDocument()); + if (subRange.usesUnicode()) + subRange = new Range(absOffset, (absOffset + (searchText.length() * 2)), para.getDocument()); + + assertEquals(searchText, subRange.text()); + + subRange.delete(); + + // we need to let the model re-calculate the Range before we evaluate it + range = daDoc.getRange(); + + assertEquals(1, range.numSections()); + section = range.getSection(0); + + assertEquals(5, section.numParagraphs()); + para = section.getParagraph(2); + + text = para.text(); + assertEquals(expectedText2, text); + + // this can lead to a StringBufferOutOfBoundsException, so we will add it + // even though we don't have an assertion for it + Range daRange = daDoc.getRange(); + daRange.text(); + } + + /** + * Test that we can delete text (all instances of) from our Range with Unicode text. + */ + public void testRangeDeleteAll() throws Exception { + + HWPFDocument daDoc = new HWPFDocument(new FileInputStream(illustrativeDocFile)); + + Range range = daDoc.getRange(); + assertEquals(1, range.numSections()); + + Section section = range.getSection(0); + assertEquals(5, section.numParagraphs()); + + Paragraph para = section.getParagraph(2); + + String text = para.text(); + assertEquals(originalText, text); + + boolean keepLooking = true; + while (keepLooking) { + + int offset = range.text().indexOf(searchText); + if (offset >= 0) { + + int absOffset = range.getStartOffset() + offset; + if (range.usesUnicode()) + absOffset = range.getStartOffset() + (offset * 2); + + Range subRange = new Range( + absOffset, (absOffset + searchText.length()), range.getDocument()); + if (subRange.usesUnicode()) + subRange = new Range( + absOffset, (absOffset + (searchText.length() * 2)), range.getDocument()); + + assertEquals(searchText, subRange.text()); + + subRange.delete(); + + } else + keepLooking = false; + } + + // we need to let the model re-calculate the Range before we use it + range = daDoc.getRange(); + + assertEquals(1, range.numSections()); + section = range.getSection(0); + + assertEquals(5, section.numParagraphs()); + + para = section.getParagraph(1); + text = para.text(); + assertEquals(expectedText1, text); + + para = section.getParagraph(2); + text = para.text(); + assertEquals(expectedText2, text); + + para = section.getParagraph(3); + text = para.text(); + assertEquals(expectedText3, text); + } +} diff --git a/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestRangeReplacement.java b/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestRangeReplacement.java index 4b2b9ce370..bda615e943 100644 --- a/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestRangeReplacement.java +++ b/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestRangeReplacement.java @@ -39,8 +39,9 @@ public class TestRangeReplacement extends TestCase { "It is used to confirm that text replacement works even if Unicode characters (such as \u201c\u2014\u201d (U+2014), \u201c\u2e8e\u201d (U+2E8E), or \u201c\u2714\u201d (U+2714)) are present. Everybody should be thankful to the ${organization} and all the POI contributors for their assistance in this matter.\r"; private String searchText = "${organization}"; private String replacementText = "Apache Software Foundation"; - private String expectedText = + private String expectedText2 = "It is used to confirm that text replacement works even if Unicode characters (such as \u201c\u2014\u201d (U+2014), \u201c\u2e8e\u201d (U+2E8E), or \u201c\u2714\u201d (U+2714)) are present. Everybody should be thankful to the Apache Software Foundation and all the POI contributors for their assistance in this matter.\r"; + private String expectedText3 = "Thank you, Apache Software Foundation!\r"; private String illustrativeDocFile; @@ -84,7 +85,7 @@ public class TestRangeReplacement extends TestCase { /** * Test that we can replace text in our Range with Unicode text. */ - public void testRangeReplacement() throws Exception { + public void testRangeReplacementOne() throws Exception { HWPFDocument daDoc = new HWPFDocument(new FileInputStream(illustrativeDocFile)); @@ -104,16 +105,46 @@ public class TestRangeReplacement extends TestCase { para.replaceText(searchText, replacementText, offset); - // we need to let the model re-calculate the Range before we evaluate it - range = daDoc.getRange(); - assertEquals(1, range.numSections()); section = range.getSection(0); - assertEquals(5, section.numParagraphs()); + assertEquals(4, section.numParagraphs()); para = section.getParagraph(2); text = para.text(); - assertEquals(expectedText, text); + assertEquals(expectedText2, text); + } + + /** + * Test that we can replace text in our Range with Unicode text. + */ + public void testRangeReplacementAll() throws Exception { + + HWPFDocument daDoc = new HWPFDocument(new FileInputStream(illustrativeDocFile)); + + Range range = daDoc.getRange(); + assertEquals(1, range.numSections()); + + Section section = range.getSection(0); + assertEquals(5, section.numParagraphs()); + + Paragraph para = section.getParagraph(2); + + String text = para.text(); + assertEquals(originalText, text); + + range.replaceText(searchText, replacementText); + + assertEquals(1, range.numSections()); + section = range.getSection(0); + assertEquals(5, section.numParagraphs()); + + para = section.getParagraph(2); + text = para.text(); + assertEquals(expectedText2, text); + + para = section.getParagraph(3); + text = para.text(); + assertEquals(expectedText3, text); } } diff --git a/src/testcases/org/apache/poi/hssf/data/3dFormulas.xls b/src/testcases/org/apache/poi/hssf/data/3dFormulas.xls new file mode 100644 index 0000000000..82519ed839 Binary files /dev/null and b/src/testcases/org/apache/poi/hssf/data/3dFormulas.xls differ diff --git a/src/testcases/org/apache/poi/hssf/data/FormulaEvalTestData.xls b/src/testcases/org/apache/poi/hssf/data/FormulaEvalTestData.xls index ce94050789..7be92c5fa4 100644 Binary files a/src/testcases/org/apache/poi/hssf/data/FormulaEvalTestData.xls and b/src/testcases/org/apache/poi/hssf/data/FormulaEvalTestData.xls differ diff --git a/src/testcases/org/apache/poi/hssf/data/MRExtraLines.xls b/src/testcases/org/apache/poi/hssf/data/MRExtraLines.xls new file mode 100644 index 0000000000..e82e4f6f40 Binary files /dev/null and b/src/testcases/org/apache/poi/hssf/data/MRExtraLines.xls differ diff --git a/src/testcases/org/apache/poi/hssf/eventusermodel/TestEventWorkbookBuilder.java b/src/testcases/org/apache/poi/hssf/eventusermodel/TestEventWorkbookBuilder.java new file mode 100644 index 0000000000..adf084331a --- /dev/null +++ b/src/testcases/org/apache/poi/hssf/eventusermodel/TestEventWorkbookBuilder.java @@ -0,0 +1,160 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ + +package org.apache.poi.hssf.eventusermodel; +import java.io.IOException; +import java.io.InputStream; +import java.util.ArrayList; +import java.util.List; + +import junit.framework.TestCase; + +import org.apache.poi.hssf.HSSFTestDataSamples; +import org.apache.poi.hssf.eventusermodel.EventWorkbookBuilder.SheetRecordCollectingListener; +import org.apache.poi.hssf.model.FormulaParser; +import org.apache.poi.hssf.model.Workbook; +import org.apache.poi.hssf.record.FormulaRecord; +import org.apache.poi.hssf.record.Record; +import org.apache.poi.hssf.record.formula.Ref3DPtg; +import org.apache.poi.hssf.usermodel.HSSFWorkbook; +import org.apache.poi.hssf.util.SheetReferences; +import org.apache.poi.poifs.filesystem.POIFSFileSystem; +/** + * Tests for {@link EventWorkbookBuilder} + */ +public final class TestEventWorkbookBuilder extends TestCase { + private MockHSSFListener mockListen; + private SheetRecordCollectingListener listener; + + public void setUp() { + HSSFRequest req = new HSSFRequest(); + mockListen = new MockHSSFListener(); + listener = new SheetRecordCollectingListener(mockListen); + req.addListenerForAllRecords(listener); + + HSSFEventFactory factory = new HSSFEventFactory(); + try { + InputStream is = HSSFTestDataSamples.openSampleFileStream("3dFormulas.xls"); + POIFSFileSystem fs = new POIFSFileSystem(is); + factory.processWorkbookEvents(req, fs); + } catch (IOException e) { + throw new RuntimeException(e); + } + } + + public void testBasics() throws Exception { + assertNotNull(listener.getSSTRecord()); + assertNotNull(listener.getBoundSheetRecords()); + assertNotNull(listener.getExternSheetRecords()); + } + + public void testGetStubWorkbooks() throws Exception { + assertNotNull(listener.getStubWorkbook()); + assertNotNull(listener.getStubHSSFWorkbook()); + + assertNotNull(listener.getStubWorkbook().getSheetReferences()); + assertNotNull(listener.getStubHSSFWorkbook().getSheetReferences()); + } + + public void testContents() throws Exception { + assertEquals(2, listener.getSSTRecord().getNumStrings()); + assertEquals(3, listener.getBoundSheetRecords().length); + assertEquals(1, listener.getExternSheetRecords().length); + + assertEquals(3, listener.getStubWorkbook().getNumSheets()); + + SheetReferences ref = listener.getStubWorkbook().getSheetReferences(); + assertEquals("Sh3", ref.getSheetName(0)); + assertEquals("Sheet1", ref.getSheetName(1)); + assertEquals("S2", ref.getSheetName(2)); + } + + public void testFormulas() throws Exception { + FormulaRecord fr; + + // Check our formula records + assertEquals(6, mockListen._frecs.size()); + + Workbook stubWB = listener.getStubWorkbook(); + assertNotNull(stubWB); + HSSFWorkbook stubHSSF = listener.getStubHSSFWorkbook(); + assertNotNull(stubHSSF); + + // Check these stubs have the right stuff on them + assertEquals("Sheet1", stubWB.getSheetName(0)); + assertEquals("S2", stubWB.getSheetName(1)); + assertEquals("Sh3", stubWB.getSheetName(2)); + + // Check we can get the formula without breaking + for(int i=0; i 100); + } + public void openAlt() { + HSSFRequest req = new HSSFRequest(); + MockHSSFListener mockListen = new MockHSSFListener(); + MissingRecordAwareHSSFListener listener = new MissingRecordAwareHSSFListener(mockListen); + req.addListenerForAllRecords(listener); + + HSSFEventFactory factory = new HSSFEventFactory(); + try { + InputStream is = HSSFTestDataSamples.openSampleFileStream("MRExtraLines.xls"); + POIFSFileSystem fs = new POIFSFileSystem(is); + factory.processWorkbookEvents(req, fs); + } catch (IOException e) { + throw new RuntimeException(e); + } + + r = mockListen.getRecords(); + assertTrue(r.length > 100); } public void testMissingRowRecords() throws Exception { + openNormal(); // We have rows 0, 1, 2, 20 and 21 int row0 = -1; @@ -108,6 +127,7 @@ public final class TestMissingRecordAwareHSSFListener extends TestCase { } public void testEndOfRowRecords() throws Exception { + openNormal(); // Find the cell at 0,0 int cell00 = -1; @@ -194,7 +214,7 @@ public final class TestMissingRecordAwareHSSFListener extends TestCase { assertTrue(r[cell00+57] instanceof LastCellOfRowDummyRecord); // Check the numbers of the last seen columns - LastCellOfRowDummyRecord[] lrs = new LastCellOfRowDummyRecord[23]; + LastCellOfRowDummyRecord[] lrs = new LastCellOfRowDummyRecord[24]; int lrscount = 0; for(int i=0; i 0); + assertTrue(HSSFColor.YELLOW.index2 > 0); + } + + public void testContents() { + assertEquals(3, HSSFColor.YELLOW.triplet.length); + assertEquals(255, HSSFColor.YELLOW.triplet[0]); + assertEquals(255, HSSFColor.YELLOW.triplet[1]); + assertEquals(0, HSSFColor.YELLOW.triplet[2]); + + assertEquals("FFFF:FFFF:0", HSSFColor.YELLOW.hexString); + } + + public void testTrippletHash() { + Hashtable tripplets = HSSFColor.getTripletHash(); + + assertEquals( + HSSFColor.MAROON.class, + tripplets.get(HSSFColor.MAROON.hexString).getClass() + ); + assertEquals( + HSSFColor.YELLOW.class, + tripplets.get(HSSFColor.YELLOW.hexString).getClass() + ); + } +}