Fixed problem with linking shared formulas when ranges overlap

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@712307 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Josh Micich 2008-11-07 23:16:48 +00:00
parent f143809549
commit fb53e42579
12 changed files with 340 additions and 66 deletions

View File

@ -37,6 +37,7 @@
<!-- Don't forget to update status.xml too! -->
<release version="3.5-beta4" date="2008-??-??">
<action dev="POI-DEVELOPERS" type="fix">Fixed problem with linking shared formulas when ranges overlap</action>
<action dev="POI-DEVELOPERS" type="fix">45784 - More fixes to SeriesTextRecord</action>
<action dev="POI-DEVELOPERS" type="fix">46033 - fixed TableCell to correctly set text type</action>
<action dev="POI-DEVELOPERS" type="fix">46122 - fixed Picture.draw to skip rendering if picture data was not found</action>

View File

@ -34,6 +34,7 @@
<!-- Don't forget to update changes.xml too! -->
<changes>
<release version="3.5-beta4" date="2008-??-??">
<action dev="POI-DEVELOPERS" type="fix">Fixed problem with linking shared formulas when ranges overlap</action>
<action dev="POI-DEVELOPERS" type="fix">45784 - More fixes to SeriesTextRecord</action>
<action dev="POI-DEVELOPERS" type="fix">46033 - fixed TableCell to correctly set text type</action>
<action dev="POI-DEVELOPERS" type="fix">46122 - fixed Picture.draw to skip rendering if picture data was not found</action>

View File

@ -345,6 +345,10 @@ public final class FormulaRecord extends Record implements CellValueRecordInterf
return field_8_parsed_expr.getTokens();
}
public Formula getFormula() {
return field_8_parsed_expr;
}
public void setParsedExpression(Ptg[] ptgs) {
field_8_parsed_expr = Formula.create(ptgs);
}

View File

@ -182,4 +182,7 @@ public final class SharedFormulaRecord extends SharedValueRecordBase {
result.field_7_parsed_expr = field_7_parsed_expr.copy();
return result;
}
public boolean isFormulaSame(SharedFormulaRecord other) {
return field_7_parsed_expr.isSame(other.field_7_parsed_expr);
}
}

View File

@ -25,6 +25,8 @@ import org.apache.poi.hssf.record.SharedFormulaRecord;
import org.apache.poi.hssf.record.StringRecord;
import org.apache.poi.hssf.record.formula.ExpPtg;
import org.apache.poi.hssf.record.formula.Ptg;
import org.apache.poi.hssf.util.CellReference;
import org.apache.poi.ss.formula.Formula;
/**
* The formula record aggregate is used to join together the formula record and it's
@ -57,15 +59,19 @@ public final class FormulaRecordAggregate extends RecordAggregate implements Cel
+ (hasCachedStringFlag ? "" : "not ") + " set");
}
if (formulaRec.isSharedFormula()) {
CellReference cr = new CellReference(formulaRec.getRow(), formulaRec.getColumn());
CellReference firstCell = formulaRec.getFormula().getExpReference();
if (firstCell == null) {
handleMissingSharedFormulaRecord(formulaRec);
} else {
_sharedFormulaRecord = svm.linkSharedFormulaRecord(firstCell, this);
}
}
_formulaRecord = formulaRec;
_sharedValueManager = svm;
_stringRecord = stringRec;
if (formulaRec.isSharedFormula()) {
_sharedFormulaRecord = svm.linkSharedFormulaRecord(this);
if (_sharedFormulaRecord == null) {
handleMissingSharedFormulaRecord(formulaRec);
}
}
}
/**
* Sometimes the shared formula flag "seems" to be erroneously set (because the corresponding
@ -132,11 +138,15 @@ public final class FormulaRecordAggregate extends RecordAggregate implements Cel
public void visitContainedRecords(RecordVisitor rv) {
rv.visitRecord(_formulaRecord);
// TODO - only bother with this if array or table formula
Record sharedFormulaRecord = _sharedValueManager.getRecordForFirstCell(_formulaRecord);
CellReference sharedFirstCell = _formulaRecord.getFormula().getExpReference();
// perhaps this could be optimised by consulting the (somewhat unreliable) isShared flag
// and/or distinguishing between tExp and tTbl.
if (sharedFirstCell != null) {
Record sharedFormulaRecord = _sharedValueManager.getRecordForFirstCell(sharedFirstCell, this);
if (sharedFormulaRecord != null) {
rv.visitRecord(sharedFormulaRecord);
}
}
if (_stringRecord != null) {
rv.visitRecord(_stringRecord);
}

View File

@ -17,6 +17,8 @@
package org.apache.poi.hssf.record.aggregates;
import java.util.Arrays;
import java.util.Comparator;
import java.util.HashMap;
import java.util.Map;
@ -25,6 +27,10 @@ import org.apache.poi.hssf.record.FormulaRecord;
import org.apache.poi.hssf.record.SharedFormulaRecord;
import org.apache.poi.hssf.record.SharedValueRecordBase;
import org.apache.poi.hssf.record.TableRecord;
import org.apache.poi.hssf.record.formula.ExpPtg;
import org.apache.poi.hssf.record.formula.TblPtg;
import org.apache.poi.hssf.util.CellRangeAddress8Bit;
import org.apache.poi.hssf.util.CellReference;
/**
* Manages various auxiliary records while constructing a
@ -42,7 +48,7 @@ public final class SharedValueManager {
// This class should probably be generalised to handle array and table groups too
private static final class SharedValueGroup {
private final SharedValueRecordBase _svr;
private final FormulaRecordAggregate[] _frAggs;
private FormulaRecordAggregate[] _frAggs;
private int _numberOfFormulas;
public SharedValueGroup(SharedValueRecordBase svr) {
@ -54,6 +60,12 @@ public final class SharedValueManager {
}
public void add(FormulaRecordAggregate agg) {
if (_numberOfFormulas >= _frAggs.length) {
// this probably shouldn't occur - problems with sample file "15228.xls"
FormulaRecordAggregate[] temp = new FormulaRecordAggregate[_numberOfFormulas*2];
System.arraycopy(_frAggs, 0, temp, 0, _frAggs.length);
_frAggs = temp;
}
_frAggs[_numberOfFormulas++] = agg;
}
@ -63,10 +75,6 @@ public final class SharedValueManager {
}
}
public boolean isInRange(int rowIx, int columnIx) {
return _svr.isInRange(rowIx, columnIx);
}
public SharedValueRecordBase getSVR() {
return _svr;
}
@ -77,13 +85,16 @@ public final class SharedValueManager {
* that is not the top-left corner of the range.
* @return <code>true</code> if this is the first formula cell in the group
*/
public boolean isFirstCell(int row, int column) {
// hack for the moment, just check against the first formula that
// came in through the add() method.
FormulaRecordAggregate fra = _frAggs[0];
return fra.getRow() == row && fra.getColumn() == column;
public boolean isFirstMember(FormulaRecordAggregate agg) {
return agg == _frAggs[0];
}
public final String toString() {
StringBuffer sb = new StringBuffer(64);
sb.append(getClass().getName()).append(" [");
sb.append(_svr.getRange().toString());
sb.append("]");
return sb.toString();
}
}
public static final SharedValueManager EMPTY = new SharedValueManager(
@ -106,6 +117,13 @@ public final class SharedValueManager {
_groupsBySharedFormulaRecord = m;
}
/**
* @param recs list of sheet records (possibly contains records for other parts of the Excel file)
* @param startIx index of first row/cell record for current sheet
* @param endIx one past index of last row/cell record for current sheet. It is important
* that this code does not inadvertently collect <tt>SharedFormulaRecord</tt>s from any other
* sheet (which could happen if endIx is chosen poorly). (see bug 44449)
*/
public static SharedValueManager create(SharedFormulaRecord[] sharedFormulaRecords,
ArrayRecord[] arrayRecords, TableRecord[] tableRecords) {
if (sharedFormulaRecords.length + arrayRecords.length + tableRecords.length < 1) {
@ -116,53 +134,113 @@ public final class SharedValueManager {
/**
* @return <code>null</code> if the specified formula does not have any corresponding
* {@link SharedFormulaRecord}
* @param firstCell as extracted from the {@link ExpPtg} from the cell's formula.
* @return never <code>null</code>
*/
public SharedFormulaRecord linkSharedFormulaRecord(FormulaRecordAggregate agg) {
FormulaRecord formula = agg.getFormulaRecord();
int row = formula.getRow();
int column = formula.getColumn();
// Traverse the list of shared formulas in
// reverse order, and try to find the correct one
// for us
public SharedFormulaRecord linkSharedFormulaRecord(CellReference firstCell, FormulaRecordAggregate agg) {
SharedValueGroup[] groups = getGroups();
SharedValueGroup result = findGroup(getGroups(), firstCell);
result.add(agg);
return (SharedFormulaRecord) result.getSVR();
}
private static SharedValueGroup findGroup(SharedValueGroup[] groups, CellReference firstCell) {
int row = firstCell.getRow();
int column = firstCell.getCol();
// Traverse the list of shared formulas and try to find the correct one for us
// perhaps this could be optimised to some kind of binary search
for (int i = 0; i < groups.length; i++) {
SharedValueGroup svr = groups[i];
if (svr.isInRange(row, column)) {
svr.add(agg);
return (SharedFormulaRecord) svr.getSVR();
SharedValueGroup svg = groups[i];
if (svg.getSVR().isFirstCell(row, column)) {
return svg;
}
}
return null;
// else - no SharedFormulaRecord was found with the specified firstCell.
// This is unusual, but one sample file exhibits the anomaly: "ex45046-21984.xls"
// Excel seems to handle the problem OK, and doesn't even correct it. Perhaps POI should.
// search for shared formula by range
SharedValueGroup result = null;
for (int i = 0; i < groups.length; i++) {
SharedValueGroup svg = groups[i];
if (svg.getSVR().isInRange(row, column)) {
if (result != null) {
// This happens in sample file "15228.xls"
if (sharedFormulasAreSame(result, svg)) {
// hopefully this is OK - just use the first one since they are the same
// not quite
// TODO - fix file "15228.xls" so it opens in Excel after rewriting with POI
} else {
throw new RuntimeException("This cell is in the range of more than one distinct shared formula");
}
} else {
result = svg;
}
}
}
if (result == null) {
throw new RuntimeException("Failed to find a matching shared formula record");
}
return result;
}
/**
* Handles the ugly situation (seen in example "15228.xls") where a shared formula cell is
* covered by more than one shared formula range, but the formula cell's {@link ExpPtg}
* doesn't identify any of them.
* @return <code>true</code> if the underlying shared formulas are the same
*/
private static boolean sharedFormulasAreSame(SharedValueGroup grpA, SharedValueGroup grpB) {
// safe to cast here because this findGroup() is never called for ARRAY or TABLE formulas
SharedFormulaRecord sfrA = (SharedFormulaRecord) grpA.getSVR();
SharedFormulaRecord sfrB = (SharedFormulaRecord) grpB.getSVR();
return sfrA.isFormulaSame(sfrB);
}
private SharedValueGroup[] getGroups() {
if (_groups == null) {
SharedValueGroup[] groups = new SharedValueGroup[_groupsBySharedFormulaRecord.size()];
_groupsBySharedFormulaRecord.values().toArray(groups);
Arrays.sort(groups, SVGComparator); // make search behaviour more deterministic
_groups = groups;
}
return _groups;
}
private static final Comparator SVGComparator = new Comparator() {
public int compare(Object a, Object b) {
CellRangeAddress8Bit rangeA = ((SharedValueGroup)a).getSVR().getRange();
CellRangeAddress8Bit rangeB = ((SharedValueGroup)b).getSVR().getRange();
int cmp;
cmp = rangeA.getFirstRow() - rangeB.getFirstRow();
if (cmp != 0) {
return cmp;
}
cmp = rangeA.getFirstColumn() - rangeB.getFirstColumn();
if (cmp != 0) {
return cmp;
}
return 0;
}
};
/**
* Note - does not return SharedFormulaRecords currently, because the corresponding formula
* records have been converted to 'unshared'. POI does not attempt to re-share formulas. On
* the other hand, there is no such conversion for array or table formulas, so this method
* returns the TABLE or ARRAY record (if it should be written after the specified
* formulaRecord.
* The {@link SharedValueRecordBase} record returned by this method
* @param firstCell the cell coordinates as read from the {@link ExpPtg} or {@link TblPtg}
* of the current formula. Note - this is usually not the same as the cell coordinates
* of the formula's cell.
*
* @return the TABLE or ARRAY record for this formula cell, if it is the first cell of a
* table or array region.
* @return the SHRFMLA, TABLE or ARRAY record for this formula cell, if it is the first cell of a
* table or array region. <code>null</code> if
*/
public SharedValueRecordBase getRecordForFirstCell(FormulaRecord formulaRecord) {
int row = formulaRecord.getRow();
int column = formulaRecord.getColumn();
public SharedValueRecordBase getRecordForFirstCell(CellReference firstCell, FormulaRecordAggregate agg) {
int row = firstCell.getRow();
int column = firstCell.getCol();
boolean isTopLeft = agg.getRow() == row && agg.getColumn() == column;
if (isTopLeft) {
for (int i = 0; i < _tableRecords.length; i++) {
TableRecord tr = _tableRecords[i];
if (tr.isFirstCell(row, column)) {
@ -175,11 +253,19 @@ public final class SharedValueManager {
return ar;
}
}
} else {
// Since arrays and tables cannot be sparse (all cells in range participate)
// no need to search arrays and tables if agg is not the top left cell
}
SharedValueGroup[] groups = getGroups();
for (int i = 0; i < groups.length; i++) {
SharedValueGroup svg = groups[i];
if (svg.isFirstCell(row, column)) {
return svg.getSVR();
SharedValueRecordBase svr = svg.getSVR();
if (svr.isFirstCell(row, column)) {
if (svg.isFirstMember(agg)) {
return svr;
}
return null;
}
}
return null;

View File

@ -1,6 +1,15 @@
package org.apache.poi.ss.formula;
import java.util.Arrays;
import org.apache.poi.hssf.record.ArrayRecord;
import org.apache.poi.hssf.record.SharedFormulaRecord;
import org.apache.poi.hssf.record.TableRecord;
import org.apache.poi.hssf.record.formula.ExpPtg;
import org.apache.poi.hssf.record.formula.Ptg;
import org.apache.poi.hssf.record.formula.TblPtg;
import org.apache.poi.hssf.util.CellReference;
import org.apache.poi.util.LittleEndian;
import org.apache.poi.util.LittleEndianByteArrayInputStream;
import org.apache.poi.util.LittleEndianInput;
import org.apache.poi.util.LittleEndianOutput;
@ -130,4 +139,35 @@ public class Formula {
// OK to return this for the moment because currently immutable
return this;
}
/**
* Gets the locator for the corresponding {@link SharedFormulaRecord}, {@link ArrayRecord} or
* {@link TableRecord} if this formula belongs to such a grouping. The {@link CellReference}
* returned by this method will match the top left corner of the range of that grouping.
* The return value is usually not the same as the location of the cell containing this formula.
*
* @return the firstRow & firstColumn of an array formula or shared formula that this formula
* belongs to. <code>null</code> if this formula is not part of an array or shared formula.
*/
public CellReference getExpReference() {
byte[] data = _byteEncoding;
if (data.length != 5) {
// tExp and tTbl are always 5 bytes long, and the only ptg in the formula
return null;
}
switch (data[0]) {
case ExpPtg.sid:
break;
case TblPtg.sid:
break;
default:
return null;
}
int firstRow = LittleEndian.getUShort(data, 1);
int firstColumn = LittleEndian.getUShort(data, 3);
return new CellReference(firstRow, firstColumn);
}
public boolean isSame(Formula other) {
return Arrays.equals(_byteEncoding, other._byteEncoding);
}
}

View File

@ -46,8 +46,7 @@ public abstract class CellRangeAddressBase {
_firstCol = firstCol;
_lastCol = lastCol;
}
private static boolean isValid(int firstRow, int lastRow, int firstColumn, int lastColumn)
{
private static boolean isValid(int firstRow, int lastRow, int firstColumn, int lastColumn) {
if(lastRow < 0 || lastRow > LAST_ROW_INDEX) {
return false;
}
@ -129,6 +128,8 @@ public abstract class CellRangeAddressBase {
}
public final String toString() {
return getClass().getName() + " ["+_firstRow+", "+_lastRow+", "+_firstCol+", "+_lastCol+"]";
CellReference crA = new CellReference(_firstRow, _firstCol);
CellReference crB = new CellReference(_lastRow, _lastCol);
return getClass().getName() + " [" + crA.formatAsString() + ":" + crB.formatAsString() +"]";
}
}

View File

@ -34,6 +34,7 @@ public final class AllRecordAggregateTests {
result.addTestSuite(TestColumnInfoRecordsAggregate.class);
result.addTestSuite(TestFormulaRecordAggregate.class);
result.addTestSuite(TestRowRecordsAggregate.class);
result.addTestSuite(TestSharedValueManager.class);
result.addTestSuite(TestValueRecordsAggregate.class);
return result;
}

View File

@ -0,0 +1,126 @@
/* ====================================================================
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==================================================================== */
package org.apache.poi.hssf.record.aggregates;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.Collection;
import java.util.HashMap;
import junit.framework.AssertionFailedError;
import junit.framework.TestCase;
import org.apache.poi.hssf.HSSFTestDataSamples;
import org.apache.poi.hssf.record.Record;
import org.apache.poi.hssf.record.SharedFormulaRecord;
import org.apache.poi.hssf.usermodel.HSSFSheet;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.hssf.usermodel.RecordInspector;
/**
* Tests for {@link SharedValueManager}
*
* @author Josh Micich
*/
public final class TestSharedValueManager extends TestCase {
/**
* This Excel workbook contains two sheets that each have a pair of overlapping shared formula
* ranges. The first sheet has one row and one column shared formula ranges which intersect.
* The second sheet has two column shared formula ranges - one contained within the other.
* These shared formula ranges were created by fill-dragging a single cell formula across the
* desired region. The larger shared formula ranges were placed first.<br/>
*
* There are probably many ways to produce similar effects, but it should be noted that Excel
* is quite temperamental in this regard. Slight variations in technique can cause the shared
* formulas to spill out into plain formula records (which would make these tests pointless).
*
*/
private static final String SAMPLE_FILE_NAME = "overlapSharedFormula.xls";
/**
* Some of these bugs are intermittent, and the test author couldn't think of a way to write
* test code to hit them bug deterministically. The reason for the unpredictability is that
* the bugs depended on the {@link SharedFormulaRecord}s being searched in a particular order.
* At the time of writing of the test, the order was being determined by the call to {@link
* Collection#toArray(Object[])} on {@link HashMap#values()} where the items in the map were
* using default {@link Object#hashCode()}<br/>
*/
private static final int MAX_ATTEMPTS=5;
/**
* This bug happened when there were two or more shared formula ranges that overlapped. POI
* would sometimes associate formulas in the overlapping region with the wrong shared formula
*/
public void testPartiallyOverlappingRanges() throws IOException {
Record[] records;
int attempt=1;
do {
HSSFWorkbook wb = HSSFTestDataSamples.openSampleWorkbook(SAMPLE_FILE_NAME);
HSSFSheet sheet = wb.getSheetAt(0);
RecordInspector.getRecords(sheet, 0);
assertEquals("1+1", sheet.getRow(2).getCell(0).getCellFormula());
if ("1+1".equals(sheet.getRow(3).getCell(0).getCellFormula())) {
throw new AssertionFailedError("Identified bug - wrong shared formula record chosen"
+ " (attempt " + attempt + ")");
}
assertEquals("2+2", sheet.getRow(3).getCell(0).getCellFormula());
records = RecordInspector.getRecords(sheet, 0);
} while (attempt++ < MAX_ATTEMPTS);
int count=0;
for (int i = 0; i < records.length; i++) {
if (records[i] instanceof SharedFormulaRecord) {
count++;
}
}
assertEquals(2, count);
}
/**
* This bug occurs for similar reasons to the bug in {@link #testPartiallyOverlappingRanges()}
* but the symptoms are much uglier - serialization fails with {@link NullPointerException}.<br/>
*/
public void testCompletelyOverlappedRanges() throws IOException {
Record[] records;
int attempt=1;
do {
HSSFWorkbook wb = HSSFTestDataSamples.openSampleWorkbook(SAMPLE_FILE_NAME);
HSSFSheet sheet = wb.getSheetAt(1);
try {
records = RecordInspector.getRecords(sheet, 0);
} catch (NullPointerException e) {
throw new AssertionFailedError("Identified bug " +
"- cannot reserialize completely overlapped shared formula"
+ " (attempt " + attempt + ")");
}
} while (attempt++ < MAX_ATTEMPTS);
int count=0;
for (int i = 0; i < records.length; i++) {
if (records[i] instanceof SharedFormulaRecord) {
count++;
}
}
assertEquals(2, count);
}
}

View File

@ -60,6 +60,7 @@ public final class TestBugs extends TestCase {
if (true) { // set to false to output test files
return;
}
System.setProperty("poi.keep.tmp.files", "true");
File file;
try {
file = TempFile.createTempFile(simpleFileName + "#", ".xls");