Formula values for Excel 4 extractor, for TIKA-1490

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1642497 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Nick Burch 2014-11-30 01:30:43 +00:00
parent c00d439f0a
commit 63fd48d501
3 changed files with 40 additions and 22 deletions

View File

@ -22,7 +22,6 @@ import java.io.FileInputStream;
import java.io.IOException; import java.io.IOException;
import java.io.InputStream; import java.io.InputStream;
import org.apache.poi.hssf.record.FormulaRecord;
import org.apache.poi.hssf.record.NumberRecord; import org.apache.poi.hssf.record.NumberRecord;
import org.apache.poi.hssf.record.OldFormulaRecord; import org.apache.poi.hssf.record.OldFormulaRecord;
import org.apache.poi.hssf.record.OldLabelRecord; import org.apache.poi.hssf.record.OldLabelRecord;
@ -42,7 +41,6 @@ import org.apache.poi.ss.usermodel.Cell;
*/ */
public class OldExcelExtractor { public class OldExcelExtractor {
private InputStream input; private InputStream input;
private boolean _includeSheetNames = true;
public OldExcelExtractor(InputStream input) { public OldExcelExtractor(InputStream input) {
this.input = input; this.input = input;
@ -61,13 +59,6 @@ public class OldExcelExtractor {
System.out.println(extractor.getText()); System.out.println(extractor.getText());
} }
/**
* Should sheet names be included? Default is true
*/
public void setIncludeSheetNames(boolean includeSheetNames) {
_includeSheetNames = includeSheetNames;
}
/** /**
* Retrieves the text contents of the file, as best we can * Retrieves the text contents of the file, as best we can
* for these old file formats * for these old file formats
@ -95,32 +86,35 @@ public class OldExcelExtractor {
text.append(sr.getString()); text.append(sr.getString());
text.append('\n'); text.append('\n');
break; break;
// number - 5.71 - TODO Needs format strings
case NumberRecord.sid: case NumberRecord.sid:
NumberRecord nr = new NumberRecord(ris); NumberRecord nr = new NumberRecord(ris);
text.append(nr.getValue()); handleNumericCell(text, nr.getValue());
text.append('\n');
break; break;
case OldFormulaRecord.biff2_sid: case OldFormulaRecord.biff2_sid:
case OldFormulaRecord.biff3_sid: case OldFormulaRecord.biff3_sid:
case OldFormulaRecord.biff4_sid: case OldFormulaRecord.biff4_sid:
OldFormulaRecord fr = new OldFormulaRecord(ris); OldFormulaRecord fr = new OldFormulaRecord(ris);
// if (fr.getCachedResultType() == Cell.CELL_TYPE_NUMERIC) { if (fr.getCachedResultType() == Cell.CELL_TYPE_NUMERIC) {
text.append(fr.getValue()); handleNumericCell(text, fr.getValue());
text.append('\n'); }
// }
break; break;
case RKRecord.sid: case RKRecord.sid:
RKRecord rr = new RKRecord(ris); RKRecord rr = new RKRecord(ris);
text.append(rr.getRKNumber()); handleNumericCell(text, rr.getRKNumber());
text.append('\n');
break; break;
default: default:
ris.readFully(new byte[ris.remaining()]); ris.readFully(new byte[ris.remaining()]);
// text.append(" = " + ris.getSid() + " = \n");
} }
} }
return text.toString(); return text.toString();
} }
protected void handleNumericCell(StringBuffer text, double value) {
// TODO Need to fetch / use format strings
text.append(value);
text.append('\n');
}
} }

View File

@ -47,7 +47,7 @@ public final class FormulaRecord extends CellRecord {
* Excel encodes the same 8 bytes that would be field_4_value with various NaN * Excel encodes the same 8 bytes that would be field_4_value with various NaN
* values that are decoded/encoded by this class. * values that are decoded/encoded by this class.
*/ */
private static final class SpecialCachedValue { static final class SpecialCachedValue {
/** deliberately chosen by Excel in order to encode other values within Double NaNs */ /** deliberately chosen by Excel in order to encode other values within Double NaNs */
private static final long BIT_MARKER = 0xFFFF000000000000L; private static final long BIT_MARKER = 0xFFFF000000000000L;
private static final int VARIABLE_DATA_LENGTH = 6; private static final int VARIABLE_DATA_LENGTH = 6;

View File

@ -17,6 +17,8 @@
package org.apache.poi.hssf.record; package org.apache.poi.hssf.record;
import org.apache.poi.hssf.record.FormulaRecord.SpecialCachedValue;
import org.apache.poi.hssf.usermodel.HSSFCell;
import org.apache.poi.ss.formula.Formula; import org.apache.poi.ss.formula.Formula;
import org.apache.poi.ss.formula.ptg.Ptg; import org.apache.poi.ss.formula.ptg.Ptg;
@ -30,6 +32,7 @@ public final class OldFormulaRecord extends OldCellRecord {
public final static short biff4_sid = 0x0406; public final static short biff4_sid = 0x0406;
public final static short biff5_sid = 0x0006; public final static short biff5_sid = 0x0006;
private SpecialCachedValue specialCachedValue;
private double field_4_value; private double field_4_value;
private short field_5_options; private short field_5_options;
private Formula field_6_parsed_expr; private Formula field_6_parsed_expr;
@ -37,8 +40,15 @@ public final class OldFormulaRecord extends OldCellRecord {
public OldFormulaRecord(RecordInputStream ris) { public OldFormulaRecord(RecordInputStream ris) {
super(ris, ris.getSid() == biff2_sid); super(ris, ris.getSid() == biff2_sid);
// TODO Handle special cached values, for Biff 3+ if (isBiff2()) {
field_4_value = ris.readDouble(); field_4_value = ris.readDouble();
} else {
long valueLongBits = ris.readLong();
specialCachedValue = SpecialCachedValue.create(valueLongBits);
if (specialCachedValue == null) {
field_4_value = Double.longBitsToDouble(valueLongBits);
}
}
if (isBiff2()) { if (isBiff2()) {
field_5_options = (short)ris.readUByte(); field_5_options = (short)ris.readUByte();
@ -51,6 +61,20 @@ public final class OldFormulaRecord extends OldCellRecord {
field_6_parsed_expr = Formula.read(expression_len, ris, nBytesAvailable); field_6_parsed_expr = Formula.read(expression_len, ris, nBytesAvailable);
} }
public int getCachedResultType() {
if (specialCachedValue == null) {
return HSSFCell.CELL_TYPE_NUMERIC;
}
return specialCachedValue.getValueType();
}
public boolean getCachedBooleanValue() {
return specialCachedValue.getBooleanValue();
}
public int getCachedErrorValue() {
return specialCachedValue.getErrorValue();
}
/** /**
* get the calculated value of the formula * get the calculated value of the formula
* *