mirror of https://github.com/apache/poi.git
Further Excel 4 text extractor support, for TIKA-1490
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1642492 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
ff4b0376c8
commit
e8374f0a9d
|
@ -24,6 +24,7 @@ import java.io.InputStream;
|
|||
|
||||
import org.apache.poi.hssf.record.FormulaRecord;
|
||||
import org.apache.poi.hssf.record.NumberRecord;
|
||||
import org.apache.poi.hssf.record.OldFormulaRecord;
|
||||
import org.apache.poi.hssf.record.OldLabelRecord;
|
||||
import org.apache.poi.hssf.record.OldStringRecord;
|
||||
import org.apache.poi.hssf.record.RKRecord;
|
||||
|
@ -100,15 +101,15 @@ public class OldExcelExtractor {
|
|||
text.append(nr.getValue());
|
||||
text.append('\n');
|
||||
break;
|
||||
/*
|
||||
case OldFormulaRecord.sid:
|
||||
FormulaRecord fr = new FormulaRecord(ris);
|
||||
System.out.println(fr.getCachedResultType());
|
||||
if (fr.getCachedResultType() == Cell.CELL_TYPE_NUMERIC) {
|
||||
case OldFormulaRecord.biff2_sid:
|
||||
case OldFormulaRecord.biff3_sid:
|
||||
case OldFormulaRecord.biff4_sid:
|
||||
OldFormulaRecord fr = new OldFormulaRecord(ris);
|
||||
// if (fr.getCachedResultType() == Cell.CELL_TYPE_NUMERIC) {
|
||||
text.append(fr.getValue());
|
||||
text.append('\n');
|
||||
}
|
||||
*/
|
||||
// }
|
||||
break;
|
||||
case RKRecord.sid:
|
||||
RKRecord rr = new RKRecord(ris);
|
||||
text.append(rr.getRKNumber());
|
||||
|
|
|
@ -36,7 +36,6 @@ import org.apache.poi.util.LittleEndianOutput;
|
|||
public final class FormulaRecord extends CellRecord {
|
||||
|
||||
public static final short sid = 0x0006; // docs say 406...because of a bug Microsoft support site article #Q184647)
|
||||
public static final short olderSid = 0x0406; // older biff versions do manage 406!
|
||||
private static int FIXED_SIZE = 14; // double + short + int
|
||||
|
||||
private static final BitField alwaysCalc = BitFieldFactory.getInstance(0x0001);
|
||||
|
|
|
@ -0,0 +1,118 @@
|
|||
/* ====================================================================
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==================================================================== */
|
||||
|
||||
package org.apache.poi.hssf.record;
|
||||
|
||||
import org.apache.poi.ss.formula.Formula;
|
||||
import org.apache.poi.ss.formula.ptg.Ptg;
|
||||
|
||||
/**
|
||||
* Formula Record (0x0006 / 0x0206 / 0x0406) - holds a formula in
|
||||
* encoded form, along with the value if a number
|
||||
*/
|
||||
public final class OldFormulaRecord {
|
||||
public final static short biff2_sid = 0x0006;
|
||||
public final static short biff3_sid = 0x0206;
|
||||
public final static short biff4_sid = 0x0406;
|
||||
public final static short biff5_sid = 0x0006;
|
||||
|
||||
private short sid;
|
||||
private int field_1_row;
|
||||
private short field_2_column;
|
||||
private int field_3_cell_attrs; // Biff 2
|
||||
private short field_3_xf_index; // Biff 3+
|
||||
private double field_4_value;
|
||||
private short field_5_options;
|
||||
private Formula field_6_parsed_expr;
|
||||
|
||||
public OldFormulaRecord(RecordInputStream ris) {
|
||||
field_1_row = ris.readUShort();
|
||||
field_2_column = ris.readShort();
|
||||
|
||||
if (ris.getSid() == biff2_sid) {
|
||||
field_3_cell_attrs = ris.readUShort() << 8;
|
||||
field_3_cell_attrs += ris.readUByte();
|
||||
} else {
|
||||
field_3_xf_index = ris.readShort();
|
||||
}
|
||||
|
||||
// TODO Handle special cached values, for Biff 3+
|
||||
field_4_value = ris.readDouble();
|
||||
|
||||
if (ris.getSid() == biff2_sid) {
|
||||
field_5_options = (short)ris.readUByte();
|
||||
} else {
|
||||
field_5_options = ris.readShort();
|
||||
}
|
||||
|
||||
int expression_len = ris.readShort();
|
||||
int nBytesAvailable = ris.available();
|
||||
field_6_parsed_expr = Formula.read(expression_len, ris, nBytesAvailable);
|
||||
}
|
||||
|
||||
public int getRow()
|
||||
{
|
||||
return field_1_row;
|
||||
}
|
||||
|
||||
public short getColumn()
|
||||
{
|
||||
return field_2_column;
|
||||
}
|
||||
|
||||
public short getXFIndex()
|
||||
{
|
||||
return field_3_xf_index;
|
||||
}
|
||||
public int getCellAttrs()
|
||||
{
|
||||
return field_3_cell_attrs;
|
||||
}
|
||||
|
||||
/**
|
||||
* get the calculated value of the formula
|
||||
*
|
||||
* @return calculated value
|
||||
*/
|
||||
public double getValue() {
|
||||
return field_4_value;
|
||||
}
|
||||
|
||||
/**
|
||||
* get the option flags
|
||||
*
|
||||
* @return bitmask
|
||||
*/
|
||||
public short getOptions() {
|
||||
return field_5_options;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return the formula tokens. never <code>null</code>
|
||||
*/
|
||||
public Ptg[] getParsedExpression() {
|
||||
return field_6_parsed_expr.getTokens();
|
||||
}
|
||||
|
||||
public Formula getFormula() {
|
||||
return field_6_parsed_expr;
|
||||
}
|
||||
|
||||
public short getSid() {
|
||||
return sid;
|
||||
}
|
||||
}
|
|
@ -65,7 +65,7 @@ public final class TestOldExcelExtractor extends TestCase {
|
|||
assertTrue(text, text.contains("$100,000 or more"));
|
||||
assertTrue(text, text.contains("S corporation returns, Form 1120S [10,15]"));
|
||||
// TODO Get these quotes working correctly
|
||||
// assertTrue(text, text.contains("individual income tax return “short forms.”"));
|
||||
// assertTrue(text, text.contains("individual income tax return \u201Cshort forms.\u201D"));
|
||||
|
||||
// Formula based strings
|
||||
// TODO Find some then test
|
||||
|
|
Loading…
Reference in New Issue