Bugzilla 52569: Support DConRefRecord in HSSF

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1241380 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Yegor Kozlov 2012-02-07 09:00:47 +00:00
parent 465974f540
commit dd9c547ea3
6 changed files with 627 additions and 0 deletions

View File

@ -34,6 +34,7 @@
<changes> <changes>
<release version="3.8-beta6" date="2012-??-??"> <release version="3.8-beta6" date="2012-??-??">
<action dev="poi-developers" type="add">52569 - Support DConRefRecord in HSSF</action>
<action dev="poi-developers" type="add">52575 - added an option to ignore missing workbook references in formula evaluator</action> <action dev="poi-developers" type="add">52575 - added an option to ignore missing workbook references in formula evaluator</action>
<action dev="poi-developers" type="add">Validate address of hyperlinks in XSSF</action> <action dev="poi-developers" type="add">Validate address of hyperlinks in XSSF</action>
<action dev="poi-developers" type="fix">52540 - Relax the M4.1 constraint on reading OOXML files, as some Office produced ones do have 2 Core Properties, despite the specification explicitly forbidding this</action> <action dev="poi-developers" type="fix">52540 - Relax the M4.1 constraint on reading OOXML files, as some Office produced ones do have 2 Core Properties, despite the specification explicitly forbidding this</action>

View File

@ -148,6 +148,7 @@ public final class BiffViewer {
case DatRecord.sid: return new DatRecord(in); case DatRecord.sid: return new DatRecord(in);
case DataFormatRecord.sid: return new DataFormatRecord(in); case DataFormatRecord.sid: return new DataFormatRecord(in);
case DateWindow1904Record.sid: return new DateWindow1904Record(in); case DateWindow1904Record.sid: return new DateWindow1904Record(in);
case DConRefRecord.sid: return new DConRefRecord(in);
case DefaultColWidthRecord.sid:return new DefaultColWidthRecord(in); case DefaultColWidthRecord.sid:return new DefaultColWidthRecord(in);
case DefaultDataLabelTextPropertiesRecord.sid: return new DefaultDataLabelTextPropertiesRecord(in); case DefaultDataLabelTextPropertiesRecord.sid: return new DefaultDataLabelTextPropertiesRecord(in);
case DefaultRowHeightRecord.sid: return new DefaultRowHeightRecord(in); case DefaultRowHeightRecord.sid: return new DefaultRowHeightRecord(in);

View File

@ -0,0 +1,313 @@
/*
* ====================================================================
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* ====================================================================
*/
package org.apache.poi.hssf.record;
import java.util.Arrays;
import org.apache.poi.util.LittleEndian;
import org.apache.poi.util.LittleEndianOutput;
/**
* DConRef records specify a range in a workbook (internal or external) that serves as a data source
* for pivot tables or data consolidation.
*
* Represents a <code>DConRef</code> Structure
* <a href="http://msdn.microsoft.com/en-us/library/dd923854(office.12).aspx">[MS-XLS s.
* 2.4.86]</a>, and the contained <code>DConFile</code> structure
* <a href="http://msdn.microsoft.com/en-us/library/dd950157(office.12).aspx">
* [MS-XLS s. 2.5.69]</a>. This in turn contains a <code>XLUnicodeStringNoCch</code>
* <a href="http://msdn.microsoft.com/en-us/library/dd910585(office.12).aspx">
* [MS-XLS s. 2.5.296]</a>.
*
* <pre>
* _______________________________
* | DConRef |
*(bytes) +-+-+-+-+-+-+-+-+-+-+...+-+-+-+-+
* | ref |cch| stFile | un|
* +-+-+-+-+-+-+-+-+-+-+...+-+-+-+-+
* |
* _________|_____________________
* |DConFile / XLUnicodeStringNoCch|
* +-+-+-+-+-+-+-+-+-+-+-+...+-+-+-+
* (bits) |h| reserved | rgb |
* +-+-+-+-+-+-+-+-+-+-+-+...+-+-+-+
* </pre>
* Where
* <ul>
* <li><code>DConFile.h = 0x00</code> if the characters in<code>rgb</code> are single byte, and
* <code>DConFile.h = 0x01</code> if they are double byte. <br/>
* If they are double byte, then<br/>
* <ul>
* <li> If it exists, the length of <code>DConRef.un = 2</code>. Otherwise it is 1.
* <li> The length of <code>DConFile.rgb = (2 * DConRef.cch)</code>. Otherwise it is equal to
* <code>DConRef.cch</code>.
* </ul>
* <li><code>DConRef.rgb</code> starts with <code>0x01</code> if it is an external reference,
* and with <code>0x02</code> if it is a self-reference.
* </ul>
*
* At the moment this class is read-only.
*
* @author Niklas Rehfeld
*/
public class DConRefRecord extends StandardRecord
{
/**
* The id of the record type,
* <code>sid = {@value}</code>
*/
public static final short sid = 0x0051;
/**
* A RefU structure specifying the range of cells if this record is part of an SXTBL.
* <a href="http://msdn.microsoft.com/en-us/library/dd920420(office.12).aspx">
* [MS XLS s.2.5.211]</a>
*/
private int firstRow, lastRow, firstCol, lastCol;
/**
* the number of chars in the link
*/
private int charCount;
/**
* the type of characters (single or double byte)
*/
private int charType;
/**
* The link's path string. This is the <code>rgb</code> field of a
* <code>XLUnicodeStringNoCch</code>. Therefore it will contain at least one leading special
* character (0x01 or 0x02) and probably other ones.<p/>
* @see <A href="http://msdn.microsoft.com/en-us/library/dd923491(office.12).aspx">
* DConFile [MS-XLS s. 2.5.77]</A> and
* <A href="http://msdn.microsoft.com/en-us/library/dd950157(office.12).aspx">
* VirtualPath [MS-XLS s. 2.5.69]</a>
* <p/>
*/
private byte[] path;
/**
* unused bits at the end, must be set to 0.
*/
private byte[] _unused;
/**
* Read constructor.
*
* @param data byte array containing a DConRef Record, including the header.
*/
public DConRefRecord(byte[] data)
{
int offset = 0;
if (!(LittleEndian.getShort(data, offset) == DConRefRecord.sid))
throw new RecordFormatException("incompatible sid.");
offset += LittleEndian.SHORT_SIZE;
//length = LittleEndian.getShort(data, offset);
offset += LittleEndian.SHORT_SIZE;
firstRow = LittleEndian.getUShort(data, offset);
offset += LittleEndian.SHORT_SIZE;
lastRow = LittleEndian.getUShort(data, offset);
offset += LittleEndian.SHORT_SIZE;
firstCol = LittleEndian.getUByte(data, offset);
offset += LittleEndian.BYTE_SIZE;
lastCol = LittleEndian.getUByte(data, offset);
offset += LittleEndian.BYTE_SIZE;
charCount = LittleEndian.getUShort(data, offset);
offset += LittleEndian.SHORT_SIZE;
if (charCount < 2)
throw new org.apache.poi.hssf.record.RecordFormatException(
"Character count must be >= 2");
charType = LittleEndian.getUByte(data, offset);
offset += LittleEndian.BYTE_SIZE; //7 bits reserved + 1 bit type
/*
* bytelength is the length of the string in bytes, which depends on whether the string is
* made of single- or double-byte chars. This is given by charType, which equals 0 if
* single-byte, 1 if double-byte.
*/
int byteLength = charCount * ((charType & 1) + 1);
path = LittleEndian.getByteArray(data, offset, byteLength);
offset += byteLength;
/*
* If it's a self reference, the last one or two bytes (depending on char type) are the
* unused field. Not sure If i need to bother with this...
*/
if (path[0] == 0x02)
_unused = LittleEndian.getByteArray(data, offset, (charType + 1));
}
/**
* Read Constructor.
*
* @param inStream RecordInputStream containing a DConRefRecord structure.
*/
public DConRefRecord(RecordInputStream inStream)
{
if (inStream.getSid() != sid)
throw new RecordFormatException("Wrong sid: " + inStream.getSid());
firstRow = inStream.readUShort();
lastRow = inStream.readUShort();
firstCol = inStream.readUByte();
lastCol = inStream.readUByte();
charCount = inStream.readUShort();
charType = inStream.readUByte() & 0x01; //first bit only.
// byteLength depends on whether we are using single- or double-byte chars.
int byteLength = charCount * (charType + 1);
path = new byte[byteLength];
inStream.readFully(path);
if (path[0] == 0x02)
_unused = inStream.readRemainder();
}
/*
* assuming this wants the number of bytes returned by {@link serialize(LittleEndianOutput)},
* that is, (length - 4).
*/
@Override
protected int getDataSize()
{
int sz = 9 + path.length;
if (path[0] == 0x02)
sz += _unused.length;
return sz;
}
@Override
protected void serialize(LittleEndianOutput out)
{
out.writeShort(firstRow);
out.writeShort(lastRow);
out.writeByte(firstCol);
out.writeByte(lastCol);
out.writeShort(charCount);
out.writeByte(charType);
out.write(path);
if (path[0] == 0x02)
out.write(_unused);
}
@Override
public short getSid()
{
return sid;
}
/**
* @return The first column of the range.
*/
public int getFirstColumn()
{
return firstCol;
}
/**
* @return The first row of the range.
*/
public int getFirstRow()
{
return firstRow;
}
/**
* @return The last column of the range.
*/
public int getLastColumn()
{
return lastCol;
}
/**
* @return The last row of the range.
*/
public int getLastRow()
{
return lastRow;
}
@Override
public String toString()
{
StringBuilder b = new StringBuilder();
b.append("[DCONREF]\n");
b.append(" .ref\n");
b.append(" .firstrow = ").append(firstRow).append("\n");
b.append(" .lastrow = ").append(lastRow).append("\n");
b.append(" .firstcol = ").append(firstCol).append("\n");
b.append(" .lastcol = ").append(lastCol).append("\n");
b.append(" .cch = ").append(charCount).append("\n");
b.append(" .stFile\n");
b.append(" .h = ").append(charType).append("\n");
b.append(" .rgb = ").append(getReadablePath()).append("\n");
b.append("[/DCONREF]\n");
return b.toString();
}
/**
*
* @return raw path byte array.
*/
public byte[] getPath()
{
return Arrays.copyOf(path, path.length);
}
/**
* @return the link's path, with the special characters stripped/replaced. May be null.
* @see MS-XLS 2.5.277 (VirtualPath)
*/
public String getReadablePath()
{
if (path != null)
{
//all of the path strings start with either 0x02 or 0x01 followed by zero or
//more of 0x01..0x08
int offset = 1;
while (path[offset] < 0x20 && offset < path.length)
{
offset++;
}
String out = new String(Arrays.copyOfRange(path, offset, path.length));
//UNC paths have \u0003 chars as path separators.
out = out.replaceAll("\u0003", "/");
return out;
}
return null;
}
/**
* Checks if the data source in this reference record is external to this sheet or internal.
*
* @return true iff this is an external reference.
*/
public boolean isExternalRef()
{
if (path[0] == 0x01)
return true;
return false;
}
}

View File

@ -129,6 +129,7 @@ public final class RecordFactory {
CRNRecord.class, CRNRecord.class,
DateWindow1904Record.class, DateWindow1904Record.class,
DBCellRecord.class, DBCellRecord.class,
DConRefRecord.class,
DefaultColWidthRecord.class, DefaultColWidthRecord.class,
DefaultRowHeightRecord.class, DefaultRowHeightRecord.class,
DeltaRecord.class, DeltaRecord.class,

View File

@ -96,6 +96,7 @@ public final class AllRecordTests {
result.addTestSuite(TestUnicodeNameRecord.class); result.addTestSuite(TestUnicodeNameRecord.class);
result.addTestSuite(TestUnicodeString.class); result.addTestSuite(TestUnicodeString.class);
result.addTestSuite(TestWriteAccessRecord.class); result.addTestSuite(TestWriteAccessRecord.class);
result.addTestSuite(TestDConRefRecord.class);
return result; return result;
} }
} }

View File

@ -0,0 +1,310 @@
/*
* ====================================================================
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* ====================================================================
*/
package org.apache.poi.hssf.record;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.util.Arrays;
import junit.framework.TestCase;
import org.apache.poi.hssf.record.RecordInputStream;
import org.apache.poi.hssf.record.TestcaseRecordInputStream;
import org.apache.poi.util.LittleEndianOutputStream;
//import static org.junit.Assert.assertArrayEquals;
//import org.junit.Test;
import junit.framework.Assert;
/**
* Unit tests for DConRefRecord class.
*
* @author Niklas Rehfeld
*/
public class TestDConRefRecord extends TestCase
{
/**
* record of a proper single-byte external 'volume'-style path with multiple parts and a sheet
* name.
*/
final byte[] volumeString = new byte[]
{
0, 0, 0, 0, 0, 0, //ref (6 bytes) not used...
17, 0,//cchFile (2 bytes)
0, //char type
1, 1, 'c', '[', 'f', 'o', 'o', 0x3,
'b', 'a', 'r', ']', 's', 'h', 'e', 'e',
't'
};
/**
* record of a proper single-byte external 'unc-volume'-style path with multiple parts and a
* sheet name.
*/
final byte[] uncVolumeString = new byte[]
{
0, 0, 0, 0, 0, 0, //ref (6 bytes) not used...
34, 0,//cchFile (2 bytes)
0, //char type
1, 1, '@', '[', 'c', 'o', 'm', 'p',
0x3, 's', 'h', 'a', 'r', 'e', 'd', 0x3,
'r', 'e', 'l', 'a', 't', 'i', 'v', 'e',
0x3, 'f', 'o', 'o', ']', 's', 'h', 'e',
'e', 't'
};
/**
* record of a proper single-byte external 'simple-file-path-dcon' style path with a sheet name.
*/
final byte[] simpleFilePathDconString = new byte[]
{
0, 0, 0, 0, 0, 0, //ref (6 bytes) not used...
16, 0,//cchFile (2 bytes)
0, //char type
1, 'c', '[', 'f', 'o', 'o', 0x3, 'b',
'a', 'r', ']', 's', 'h', 'e', 'e', 't'
};
/**
* record of a proper 'transfer-protocol'-style path. This one has a sheet name at the end, and
* another one inside the file path. The spec doesn't seem to care about what they are.
*/
final byte[] transferProtocolString = new byte[]
{
0, 0, 0, 0, 0, 0, //ref (6 bytes) not used...
33, 0,//cchFile (2 bytes)
0, //char type
0x1, 0x5, 30, //count = 30
'[', 'h', 't', 't', 'p', ':', '/', '/',
'[', 'f', 'o', 'o', 0x3, 'b', 'a', 'r',
']', 's', 'h', 'e', 'e', 't', '1', ']',
's', 'h', 'e', 'e', 't', 'x'
};
/**
* startup-type path.
*/
final byte[] relVolumeString = new byte[]
{
0, 0, 0, 0, 0, 0, //ref (6 bytes) not used...
16, 0,//cchFile (2 bytes)
0, //char type
0x1, 0x2, '[', 'f', 'o', 'o', 0x3, 'b',
'a', 'r', ']', 's', 'h', 'e', 'e', 't'
};
/**
* startup-type path.
*/
final byte[] startupString = new byte[]
{
0, 0, 0, 0, 0, 0, //ref (6 bytes) not used...
16, 0,//cchFile (2 bytes)
0, //char type
0x1, 0x6, '[', 'f', 'o', 'o', 0x3, 'b',
'a', 'r', ']', 's', 'h', 'e', 'e', 't'
};
/**
* alt-startup-type path.
*/
final byte[] altStartupString = new byte[]
{
0, 0, 0, 0, 0, 0, //ref (6 bytes) not used...
16, 0,//cchFile (2 bytes)
0, //char type
0x1, 0x7, '[', 'f', 'o', 'o', 0x3, 'b',
'a', 'r', ']', 's', 'h', 'e', 'e', 't'
};
/**
* library-style path.
*/
final byte[] libraryString = new byte[]
{
0, 0, 0, 0, 0, 0, //ref (6 bytes) not used...
16, 0,//cchFile (2 bytes)
0, //char type
0x1, 0x8, '[', 'f', 'o', 'o', 0x3, 'b',
'a', 'r', ']', 's', 'h', 'e', 'e', 't'
};
/**
* record of single-byte string, external, volume path.
*/
final byte[] data1 = new byte[]
{
0, 0, 0, 0, 0, 0, //ref (6 bytes) not used...
10, 0,//cchFile (2 bytes)
0, //char type
1, 1, (byte) 'b', (byte) 'l', (byte) 'a', (byte) ' ', (byte) 't',
(byte) 'e', (byte) 's', (byte) 't'
//unused doesn't exist as stFile[1] != 2
};
/**
* record of double-byte string, self-reference.
*/
final byte[] data2 = new byte[]
{
0, 0, 0, 0, 0, 0, //ref (6 bytes) not used...
9, 0,//cchFile (2 bytes)
1, //char type = unicode
2, 0, (byte) 'b', 0, (byte) 'l', 0, (byte) 'a', 0, (byte) ' ', 0, (byte) 't', 0,
(byte) 'e', 0, (byte) 's', (byte) 't', 0,//stFile
0, 0 //unused (2 bytes as we're using double-byte chars)
};
/**
* record of single-byte string, self-reference.
*/
final byte[] data3 = new byte[]
{
0, 0, 0, 0, 0, 0, //ref (6 bytes) not used...
9, 0,//cchFile (2 bytes)
0, //char type = ansi
2, (byte) 'b', (byte) 'l', (byte) 'a', (byte) ' ', (byte) 't', (byte) 'e', (byte) 's',
(byte) 't',//stFile
0 //unused (1 byte as we're using single byes)
};
/**
* double-byte string, external reference, unc-volume.
*/
final byte[] data4 = new byte[]
{
0, 0, 0, 0, 0, 0, //ref (6 bytes) not used...
16, 0,//cchFile (2 bytes)
//stFile starts here:
1, //char type = unicode
1, 0, 1, 0, 0x40, 0, (byte) 'c', 0, (byte) 'o', 0, (byte) 'm', 0, (byte) 'p', 0, 0x03, 0,
(byte) 'b', 0, (byte) 'l', 0, (byte) 'a', 0, 0x03, 0, (byte) 't', 0, (byte) 'e', 0,
(byte) 's', 0, (byte) 't', 0,
//unused doesn't exist as stFile[1] != 2
};
/**
* test read-constructor-then-serialize for a single-byte external reference strings of
* various flavours. This uses the RecordInputStream constructor.
* @throws IOException
*/
public void testReadWriteSBExtRef() throws IOException
{
testReadWrite(data1, "read-write single-byte external reference, volume type path");
testReadWrite(volumeString,
"read-write properly formed single-byte external reference, volume type path");
testReadWrite(uncVolumeString,
"read-write properly formed single-byte external reference, UNC volume type path");
testReadWrite(relVolumeString,
"read-write properly formed single-byte external reference, rel-volume type path");
testReadWrite(simpleFilePathDconString,
"read-write properly formed single-byte external reference, simple-file-path-dcon type path");
testReadWrite(transferProtocolString,
"read-write properly formed single-byte external reference, transfer-protocol type path");
testReadWrite(startupString,
"read-write properly formed single-byte external reference, startup type path");
testReadWrite(altStartupString,
"read-write properly formed single-byte external reference, alt-startup type path");
testReadWrite(libraryString,
"read-write properly formed single-byte external reference, library type path");
}
/**
* test read-constructor-then-serialize for a double-byte external reference 'UNC-Volume' style
* string
* <p/>
* @throws IOException
*/
public void testReadWriteDBExtRefUncVol() throws IOException
{
testReadWrite(data4, "read-write double-byte external reference, UNC volume type path");
}
private void testReadWrite(byte[] data, String message) throws IOException
{
RecordInputStream is = TestcaseRecordInputStream.create(81, data);
DConRefRecord d = new DConRefRecord(is);
ByteArrayOutputStream bos = new ByteArrayOutputStream(data.length);
LittleEndianOutputStream o = new LittleEndianOutputStream(bos);
d.serialize(o);
o.flush();
assertTrue(message, Arrays.equals(data,
bos.toByteArray()));
}
/**
* test read-constructor-then-serialize for a double-byte self-reference style string
* <p/>
* @throws IOException
*/
public void testReadWriteDBSelfRef() throws IOException
{
testReadWrite(data2, "read-write double-byte self reference");
}
/**
* test read-constructor-then-serialize for a single-byte self-reference style string
* <p/>
* @throws IOException
*/
public void testReadWriteSBSelfRef() throws IOException
{
testReadWrite(data3, "read-write single byte self reference");
}
/**
* Test of getDataSize method, of class DConRefRecord.
*/
public void testGetDataSize()
{
DConRefRecord instance = new DConRefRecord(TestcaseRecordInputStream.create(81, data1));
int expResult = data1.length;
int result = instance.getDataSize();
assertEquals("single byte external reference, volume type path data size", expResult, result);
instance = new DConRefRecord(TestcaseRecordInputStream.create(81, data2));
assertEquals("double byte self reference data size", data2.length, instance.getDataSize());
instance = new DConRefRecord(TestcaseRecordInputStream.create(81, data3));
assertEquals("single byte self reference data size", data3.length, instance.getDataSize());
instance = new DConRefRecord(TestcaseRecordInputStream.create(81, data4));
assertEquals("double byte external reference, UNC volume type path data size", data4.length,
instance.getDataSize());
}
/**
* Test of getSid method, of class DConRefRecord.
*/
public void testGetSid()
{
DConRefRecord instance = new DConRefRecord(TestcaseRecordInputStream.create(81, data1));
short expResult = 81;
short result = instance.getSid();
assertEquals("SID", expResult, result);
}
/**
* Test of getPath method, of class DConRefRecord.
* @todo different types of paths.
*/
public void testGetPath()
{
DConRefRecord instance = new DConRefRecord(TestcaseRecordInputStream.create(81, data1));
byte[] expResult = Arrays.copyOfRange(data1, 9, data1.length);
byte[] result = instance.getPath();
assertTrue("get path", Arrays.equals(expResult, result));
}
/**
* Test of isExternalRef method, of class DConRefRecord.
*/
public void testIsExternalRef()
{
DConRefRecord instance = new DConRefRecord(TestcaseRecordInputStream.create(81, data1));
assertTrue("external reference", instance.isExternalRef());
instance = new DConRefRecord(TestcaseRecordInputStream.create(81, data2));
assertFalse("internal reference", instance.isExternalRef());
}
}