mirror of https://github.com/apache/poi.git
Formula values for Excel 4 extractor, for TIKA-1490
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1642497 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
c00d439f0a
commit
63fd48d501
|
@ -22,7 +22,6 @@ import java.io.FileInputStream;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.InputStream;
|
import java.io.InputStream;
|
||||||
|
|
||||||
import org.apache.poi.hssf.record.FormulaRecord;
|
|
||||||
import org.apache.poi.hssf.record.NumberRecord;
|
import org.apache.poi.hssf.record.NumberRecord;
|
||||||
import org.apache.poi.hssf.record.OldFormulaRecord;
|
import org.apache.poi.hssf.record.OldFormulaRecord;
|
||||||
import org.apache.poi.hssf.record.OldLabelRecord;
|
import org.apache.poi.hssf.record.OldLabelRecord;
|
||||||
|
@ -42,7 +41,6 @@ import org.apache.poi.ss.usermodel.Cell;
|
||||||
*/
|
*/
|
||||||
public class OldExcelExtractor {
|
public class OldExcelExtractor {
|
||||||
private InputStream input;
|
private InputStream input;
|
||||||
private boolean _includeSheetNames = true;
|
|
||||||
|
|
||||||
public OldExcelExtractor(InputStream input) {
|
public OldExcelExtractor(InputStream input) {
|
||||||
this.input = input;
|
this.input = input;
|
||||||
|
@ -61,13 +59,6 @@ public class OldExcelExtractor {
|
||||||
System.out.println(extractor.getText());
|
System.out.println(extractor.getText());
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Should sheet names be included? Default is true
|
|
||||||
*/
|
|
||||||
public void setIncludeSheetNames(boolean includeSheetNames) {
|
|
||||||
_includeSheetNames = includeSheetNames;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Retrieves the text contents of the file, as best we can
|
* Retrieves the text contents of the file, as best we can
|
||||||
* for these old file formats
|
* for these old file formats
|
||||||
|
@ -95,32 +86,35 @@ public class OldExcelExtractor {
|
||||||
text.append(sr.getString());
|
text.append(sr.getString());
|
||||||
text.append('\n');
|
text.append('\n');
|
||||||
break;
|
break;
|
||||||
// number - 5.71 - TODO Needs format strings
|
|
||||||
case NumberRecord.sid:
|
case NumberRecord.sid:
|
||||||
NumberRecord nr = new NumberRecord(ris);
|
NumberRecord nr = new NumberRecord(ris);
|
||||||
text.append(nr.getValue());
|
handleNumericCell(text, nr.getValue());
|
||||||
text.append('\n');
|
|
||||||
break;
|
break;
|
||||||
case OldFormulaRecord.biff2_sid:
|
case OldFormulaRecord.biff2_sid:
|
||||||
case OldFormulaRecord.biff3_sid:
|
case OldFormulaRecord.biff3_sid:
|
||||||
case OldFormulaRecord.biff4_sid:
|
case OldFormulaRecord.biff4_sid:
|
||||||
OldFormulaRecord fr = new OldFormulaRecord(ris);
|
OldFormulaRecord fr = new OldFormulaRecord(ris);
|
||||||
// if (fr.getCachedResultType() == Cell.CELL_TYPE_NUMERIC) {
|
if (fr.getCachedResultType() == Cell.CELL_TYPE_NUMERIC) {
|
||||||
text.append(fr.getValue());
|
handleNumericCell(text, fr.getValue());
|
||||||
text.append('\n');
|
}
|
||||||
// }
|
|
||||||
break;
|
break;
|
||||||
case RKRecord.sid:
|
case RKRecord.sid:
|
||||||
RKRecord rr = new RKRecord(ris);
|
RKRecord rr = new RKRecord(ris);
|
||||||
text.append(rr.getRKNumber());
|
handleNumericCell(text, rr.getRKNumber());
|
||||||
text.append('\n');
|
|
||||||
break;
|
break;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
ris.readFully(new byte[ris.remaining()]);
|
ris.readFully(new byte[ris.remaining()]);
|
||||||
// text.append(" = " + ris.getSid() + " = \n");
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return text.toString();
|
return text.toString();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
protected void handleNumericCell(StringBuffer text, double value) {
|
||||||
|
// TODO Need to fetch / use format strings
|
||||||
|
text.append(value);
|
||||||
|
text.append('\n');
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -47,7 +47,7 @@ public final class FormulaRecord extends CellRecord {
|
||||||
* Excel encodes the same 8 bytes that would be field_4_value with various NaN
|
* Excel encodes the same 8 bytes that would be field_4_value with various NaN
|
||||||
* values that are decoded/encoded by this class.
|
* values that are decoded/encoded by this class.
|
||||||
*/
|
*/
|
||||||
private static final class SpecialCachedValue {
|
static final class SpecialCachedValue {
|
||||||
/** deliberately chosen by Excel in order to encode other values within Double NaNs */
|
/** deliberately chosen by Excel in order to encode other values within Double NaNs */
|
||||||
private static final long BIT_MARKER = 0xFFFF000000000000L;
|
private static final long BIT_MARKER = 0xFFFF000000000000L;
|
||||||
private static final int VARIABLE_DATA_LENGTH = 6;
|
private static final int VARIABLE_DATA_LENGTH = 6;
|
||||||
|
|
|
@ -17,6 +17,8 @@
|
||||||
|
|
||||||
package org.apache.poi.hssf.record;
|
package org.apache.poi.hssf.record;
|
||||||
|
|
||||||
|
import org.apache.poi.hssf.record.FormulaRecord.SpecialCachedValue;
|
||||||
|
import org.apache.poi.hssf.usermodel.HSSFCell;
|
||||||
import org.apache.poi.ss.formula.Formula;
|
import org.apache.poi.ss.formula.Formula;
|
||||||
import org.apache.poi.ss.formula.ptg.Ptg;
|
import org.apache.poi.ss.formula.ptg.Ptg;
|
||||||
|
|
||||||
|
@ -30,6 +32,7 @@ public final class OldFormulaRecord extends OldCellRecord {
|
||||||
public final static short biff4_sid = 0x0406;
|
public final static short biff4_sid = 0x0406;
|
||||||
public final static short biff5_sid = 0x0006;
|
public final static short biff5_sid = 0x0006;
|
||||||
|
|
||||||
|
private SpecialCachedValue specialCachedValue;
|
||||||
private double field_4_value;
|
private double field_4_value;
|
||||||
private short field_5_options;
|
private short field_5_options;
|
||||||
private Formula field_6_parsed_expr;
|
private Formula field_6_parsed_expr;
|
||||||
|
@ -37,8 +40,15 @@ public final class OldFormulaRecord extends OldCellRecord {
|
||||||
public OldFormulaRecord(RecordInputStream ris) {
|
public OldFormulaRecord(RecordInputStream ris) {
|
||||||
super(ris, ris.getSid() == biff2_sid);
|
super(ris, ris.getSid() == biff2_sid);
|
||||||
|
|
||||||
// TODO Handle special cached values, for Biff 3+
|
if (isBiff2()) {
|
||||||
field_4_value = ris.readDouble();
|
field_4_value = ris.readDouble();
|
||||||
|
} else {
|
||||||
|
long valueLongBits = ris.readLong();
|
||||||
|
specialCachedValue = SpecialCachedValue.create(valueLongBits);
|
||||||
|
if (specialCachedValue == null) {
|
||||||
|
field_4_value = Double.longBitsToDouble(valueLongBits);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (isBiff2()) {
|
if (isBiff2()) {
|
||||||
field_5_options = (short)ris.readUByte();
|
field_5_options = (short)ris.readUByte();
|
||||||
|
@ -51,6 +61,20 @@ public final class OldFormulaRecord extends OldCellRecord {
|
||||||
field_6_parsed_expr = Formula.read(expression_len, ris, nBytesAvailable);
|
field_6_parsed_expr = Formula.read(expression_len, ris, nBytesAvailable);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public int getCachedResultType() {
|
||||||
|
if (specialCachedValue == null) {
|
||||||
|
return HSSFCell.CELL_TYPE_NUMERIC;
|
||||||
|
}
|
||||||
|
return specialCachedValue.getValueType();
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean getCachedBooleanValue() {
|
||||||
|
return specialCachedValue.getBooleanValue();
|
||||||
|
}
|
||||||
|
public int getCachedErrorValue() {
|
||||||
|
return specialCachedValue.getErrorValue();
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* get the calculated value of the formula
|
* get the calculated value of the formula
|
||||||
*
|
*
|
||||||
|
|
Loading…
Reference in New Issue