From dd9c547ea37f4eb32917a7217eacee558476db8f Mon Sep 17 00:00:00 2001 From: Yegor Kozlov Date: Tue, 7 Feb 2012 09:00:47 +0000 Subject: [PATCH] Bugzilla 52569: Support DConRefRecord in HSSF git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1241380 13f79535-47bb-0310-9956-ffa450edef68 --- src/documentation/content/xdocs/status.xml | 1 + .../org/apache/poi/hssf/dev/BiffViewer.java | 1 + .../apache/poi/hssf/record/DConRefRecord.java | 313 ++++++++++++++++++ .../apache/poi/hssf/record/RecordFactory.java | 1 + .../poi/hssf/record/AllRecordTests.java | 1 + .../poi/hssf/record/TestDConRefRecord.java | 310 +++++++++++++++++ 6 files changed, 627 insertions(+) create mode 100755 src/java/org/apache/poi/hssf/record/DConRefRecord.java create mode 100644 src/testcases/org/apache/poi/hssf/record/TestDConRefRecord.java diff --git a/src/documentation/content/xdocs/status.xml b/src/documentation/content/xdocs/status.xml index b3f0f1182f..9d87903135 100644 --- a/src/documentation/content/xdocs/status.xml +++ b/src/documentation/content/xdocs/status.xml @@ -34,6 +34,7 @@ + 52569 - Support DConRefRecord in HSSF 52575 - added an option to ignore missing workbook references in formula evaluator Validate address of hyperlinks in XSSF 52540 - Relax the M4.1 constraint on reading OOXML files, as some Office produced ones do have 2 Core Properties, despite the specification explicitly forbidding this diff --git a/src/java/org/apache/poi/hssf/dev/BiffViewer.java b/src/java/org/apache/poi/hssf/dev/BiffViewer.java index ad0de2e84b..230b19e37f 100644 --- a/src/java/org/apache/poi/hssf/dev/BiffViewer.java +++ b/src/java/org/apache/poi/hssf/dev/BiffViewer.java @@ -148,6 +148,7 @@ public final class BiffViewer { case DatRecord.sid: return new DatRecord(in); case DataFormatRecord.sid: return new DataFormatRecord(in); case DateWindow1904Record.sid: return new DateWindow1904Record(in); + case DConRefRecord.sid: return new DConRefRecord(in); case DefaultColWidthRecord.sid:return new DefaultColWidthRecord(in); case DefaultDataLabelTextPropertiesRecord.sid: return new DefaultDataLabelTextPropertiesRecord(in); case DefaultRowHeightRecord.sid: return new DefaultRowHeightRecord(in); diff --git a/src/java/org/apache/poi/hssf/record/DConRefRecord.java b/src/java/org/apache/poi/hssf/record/DConRefRecord.java new file mode 100755 index 0000000000..08ba314edb --- /dev/null +++ b/src/java/org/apache/poi/hssf/record/DConRefRecord.java @@ -0,0 +1,313 @@ +/* + * ==================================================================== + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ==================================================================== + */ +package org.apache.poi.hssf.record; + +import java.util.Arrays; +import org.apache.poi.util.LittleEndian; +import org.apache.poi.util.LittleEndianOutput; + +/** + * DConRef records specify a range in a workbook (internal or external) that serves as a data source + * for pivot tables or data consolidation. + * + * Represents a DConRef Structure + * [MS-XLS s. + * 2.4.86], and the contained DConFile structure + * + * [MS-XLS s. 2.5.69]. This in turn contains a XLUnicodeStringNoCch + * + * [MS-XLS s. 2.5.296]. + * + *
+ *         _______________________________
+ *        |          DConRef              |
+ *(bytes) +-+-+-+-+-+-+-+-+-+-+...+-+-+-+-+
+ *        |    ref    |cch|  stFile   | un|
+ *        +-+-+-+-+-+-+-+-+-+-+...+-+-+-+-+
+ *                              |
+ *                     _________|_____________________
+ *                    |DConFile / XLUnicodeStringNoCch|
+ *                    +-+-+-+-+-+-+-+-+-+-+-+...+-+-+-+
+ *             (bits) |h|   reserved  |      rgb      |
+ *                    +-+-+-+-+-+-+-+-+-+-+-+...+-+-+-+
+ * 
+ * Where + *
    + *
  • DConFile.h = 0x00 if the characters inrgb are single byte, and + * DConFile.h = 0x01 if they are double byte.
    + * If they are double byte, then
    + *
      + *
    • If it exists, the length of DConRef.un = 2. Otherwise it is 1. + *
    • The length of DConFile.rgb = (2 * DConRef.cch). Otherwise it is equal to + * DConRef.cch. + *
    + *
  • DConRef.rgb starts with 0x01 if it is an external reference, + * and with 0x02 if it is a self-reference. + *
+ * + * At the moment this class is read-only. + * + * @author Niklas Rehfeld + */ +public class DConRefRecord extends StandardRecord +{ + + /** + * The id of the record type, + * sid = {@value} + */ + public static final short sid = 0x0051; + /** + * A RefU structure specifying the range of cells if this record is part of an SXTBL. + * + * [MS XLS s.2.5.211] + */ + private int firstRow, lastRow, firstCol, lastCol; + /** + * the number of chars in the link + */ + private int charCount; + /** + * the type of characters (single or double byte) + */ + private int charType; + /** + * The link's path string. This is the rgb field of a + * XLUnicodeStringNoCch. Therefore it will contain at least one leading special + * character (0x01 or 0x02) and probably other ones.

+ * @see + * DConFile [MS-XLS s. 2.5.77] and + * + * VirtualPath [MS-XLS s. 2.5.69] + *

+ */ + private byte[] path; + /** + * unused bits at the end, must be set to 0. + */ + private byte[] _unused; + + /** + * Read constructor. + * + * @param data byte array containing a DConRef Record, including the header. + */ + public DConRefRecord(byte[] data) + { + int offset = 0; + if (!(LittleEndian.getShort(data, offset) == DConRefRecord.sid)) + throw new RecordFormatException("incompatible sid."); + offset += LittleEndian.SHORT_SIZE; + + //length = LittleEndian.getShort(data, offset); + offset += LittleEndian.SHORT_SIZE; + + firstRow = LittleEndian.getUShort(data, offset); + offset += LittleEndian.SHORT_SIZE; + lastRow = LittleEndian.getUShort(data, offset); + offset += LittleEndian.SHORT_SIZE; + firstCol = LittleEndian.getUByte(data, offset); + offset += LittleEndian.BYTE_SIZE; + lastCol = LittleEndian.getUByte(data, offset); + offset += LittleEndian.BYTE_SIZE; + charCount = LittleEndian.getUShort(data, offset); + offset += LittleEndian.SHORT_SIZE; + if (charCount < 2) + throw new org.apache.poi.hssf.record.RecordFormatException( + "Character count must be >= 2"); + + charType = LittleEndian.getUByte(data, offset); + offset += LittleEndian.BYTE_SIZE; //7 bits reserved + 1 bit type + + /* + * bytelength is the length of the string in bytes, which depends on whether the string is + * made of single- or double-byte chars. This is given by charType, which equals 0 if + * single-byte, 1 if double-byte. + */ + int byteLength = charCount * ((charType & 1) + 1); + + path = LittleEndian.getByteArray(data, offset, byteLength); + offset += byteLength; + + /* + * If it's a self reference, the last one or two bytes (depending on char type) are the + * unused field. Not sure If i need to bother with this... + */ + if (path[0] == 0x02) + _unused = LittleEndian.getByteArray(data, offset, (charType + 1)); + + } + + /** + * Read Constructor. + * + * @param inStream RecordInputStream containing a DConRefRecord structure. + */ + public DConRefRecord(RecordInputStream inStream) + { + if (inStream.getSid() != sid) + throw new RecordFormatException("Wrong sid: " + inStream.getSid()); + + firstRow = inStream.readUShort(); + lastRow = inStream.readUShort(); + firstCol = inStream.readUByte(); + lastCol = inStream.readUByte(); + + charCount = inStream.readUShort(); + charType = inStream.readUByte() & 0x01; //first bit only. + + // byteLength depends on whether we are using single- or double-byte chars. + int byteLength = charCount * (charType + 1); + + path = new byte[byteLength]; + inStream.readFully(path); + + if (path[0] == 0x02) + _unused = inStream.readRemainder(); + + } + + /* + * assuming this wants the number of bytes returned by {@link serialize(LittleEndianOutput)}, + * that is, (length - 4). + */ + @Override + protected int getDataSize() + { + int sz = 9 + path.length; + if (path[0] == 0x02) + sz += _unused.length; + return sz; + } + + @Override + protected void serialize(LittleEndianOutput out) + { + out.writeShort(firstRow); + out.writeShort(lastRow); + out.writeByte(firstCol); + out.writeByte(lastCol); + out.writeShort(charCount); + out.writeByte(charType); + out.write(path); + if (path[0] == 0x02) + out.write(_unused); + } + + @Override + public short getSid() + { + return sid; + } + + /** + * @return The first column of the range. + */ + public int getFirstColumn() + { + return firstCol; + } + + /** + * @return The first row of the range. + */ + public int getFirstRow() + { + return firstRow; + } + + /** + * @return The last column of the range. + */ + public int getLastColumn() + { + return lastCol; + } + + /** + * @return The last row of the range. + */ + public int getLastRow() + { + return lastRow; + } + + @Override + public String toString() + { + StringBuilder b = new StringBuilder(); + b.append("[DCONREF]\n"); + b.append(" .ref\n"); + b.append(" .firstrow = ").append(firstRow).append("\n"); + b.append(" .lastrow = ").append(lastRow).append("\n"); + b.append(" .firstcol = ").append(firstCol).append("\n"); + b.append(" .lastcol = ").append(lastCol).append("\n"); + b.append(" .cch = ").append(charCount).append("\n"); + b.append(" .stFile\n"); + b.append(" .h = ").append(charType).append("\n"); + b.append(" .rgb = ").append(getReadablePath()).append("\n"); + b.append("[/DCONREF]\n"); + + return b.toString(); + } + + /** + * + * @return raw path byte array. + */ + public byte[] getPath() + { + return Arrays.copyOf(path, path.length); + } + + /** + * @return the link's path, with the special characters stripped/replaced. May be null. + * @see MS-XLS 2.5.277 (VirtualPath) + */ + public String getReadablePath() + { + if (path != null) + { + //all of the path strings start with either 0x02 or 0x01 followed by zero or + //more of 0x01..0x08 + int offset = 1; + while (path[offset] < 0x20 && offset < path.length) + { + offset++; + } + String out = new String(Arrays.copyOfRange(path, offset, path.length)); + //UNC paths have \u0003 chars as path separators. + out = out.replaceAll("\u0003", "/"); + return out; + } + return null; + } + + /** + * Checks if the data source in this reference record is external to this sheet or internal. + * + * @return true iff this is an external reference. + */ + public boolean isExternalRef() + { + if (path[0] == 0x01) + return true; + return false; + } +} diff --git a/src/java/org/apache/poi/hssf/record/RecordFactory.java b/src/java/org/apache/poi/hssf/record/RecordFactory.java index 68c97bd7da..2db4996db2 100644 --- a/src/java/org/apache/poi/hssf/record/RecordFactory.java +++ b/src/java/org/apache/poi/hssf/record/RecordFactory.java @@ -129,6 +129,7 @@ public final class RecordFactory { CRNRecord.class, DateWindow1904Record.class, DBCellRecord.class, + DConRefRecord.class, DefaultColWidthRecord.class, DefaultRowHeightRecord.class, DeltaRecord.class, diff --git a/src/testcases/org/apache/poi/hssf/record/AllRecordTests.java b/src/testcases/org/apache/poi/hssf/record/AllRecordTests.java index b698a0fed0..831cb9d378 100644 --- a/src/testcases/org/apache/poi/hssf/record/AllRecordTests.java +++ b/src/testcases/org/apache/poi/hssf/record/AllRecordTests.java @@ -96,6 +96,7 @@ public final class AllRecordTests { result.addTestSuite(TestUnicodeNameRecord.class); result.addTestSuite(TestUnicodeString.class); result.addTestSuite(TestWriteAccessRecord.class); + result.addTestSuite(TestDConRefRecord.class); return result; } } diff --git a/src/testcases/org/apache/poi/hssf/record/TestDConRefRecord.java b/src/testcases/org/apache/poi/hssf/record/TestDConRefRecord.java new file mode 100644 index 0000000000..82074549ec --- /dev/null +++ b/src/testcases/org/apache/poi/hssf/record/TestDConRefRecord.java @@ -0,0 +1,310 @@ +/* + * ==================================================================== + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ==================================================================== + */ + +package org.apache.poi.hssf.record; + +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.util.Arrays; +import junit.framework.TestCase; +import org.apache.poi.hssf.record.RecordInputStream; +import org.apache.poi.hssf.record.TestcaseRecordInputStream; +import org.apache.poi.util.LittleEndianOutputStream; +//import static org.junit.Assert.assertArrayEquals; +//import org.junit.Test; +import junit.framework.Assert; + +/** + * Unit tests for DConRefRecord class. + * + * @author Niklas Rehfeld + */ +public class TestDConRefRecord extends TestCase +{ + /** + * record of a proper single-byte external 'volume'-style path with multiple parts and a sheet + * name. + */ + final byte[] volumeString = new byte[] + { + 0, 0, 0, 0, 0, 0, //ref (6 bytes) not used... + 17, 0,//cchFile (2 bytes) + 0, //char type + 1, 1, 'c', '[', 'f', 'o', 'o', 0x3, + 'b', 'a', 'r', ']', 's', 'h', 'e', 'e', + 't' + }; + /** + * record of a proper single-byte external 'unc-volume'-style path with multiple parts and a + * sheet name. + */ + final byte[] uncVolumeString = new byte[] + { + 0, 0, 0, 0, 0, 0, //ref (6 bytes) not used... + 34, 0,//cchFile (2 bytes) + 0, //char type + 1, 1, '@', '[', 'c', 'o', 'm', 'p', + 0x3, 's', 'h', 'a', 'r', 'e', 'd', 0x3, + 'r', 'e', 'l', 'a', 't', 'i', 'v', 'e', + 0x3, 'f', 'o', 'o', ']', 's', 'h', 'e', + 'e', 't' + }; + /** + * record of a proper single-byte external 'simple-file-path-dcon' style path with a sheet name. + */ + final byte[] simpleFilePathDconString = new byte[] + { + 0, 0, 0, 0, 0, 0, //ref (6 bytes) not used... + 16, 0,//cchFile (2 bytes) + 0, //char type + 1, 'c', '[', 'f', 'o', 'o', 0x3, 'b', + 'a', 'r', ']', 's', 'h', 'e', 'e', 't' + }; + /** + * record of a proper 'transfer-protocol'-style path. This one has a sheet name at the end, and + * another one inside the file path. The spec doesn't seem to care about what they are. + */ + final byte[] transferProtocolString = new byte[] + { + 0, 0, 0, 0, 0, 0, //ref (6 bytes) not used... + 33, 0,//cchFile (2 bytes) + 0, //char type + 0x1, 0x5, 30, //count = 30 + '[', 'h', 't', 't', 'p', ':', '/', '/', + '[', 'f', 'o', 'o', 0x3, 'b', 'a', 'r', + ']', 's', 'h', 'e', 'e', 't', '1', ']', + 's', 'h', 'e', 'e', 't', 'x' + }; + /** + * startup-type path. + */ + final byte[] relVolumeString = new byte[] + { + 0, 0, 0, 0, 0, 0, //ref (6 bytes) not used... + 16, 0,//cchFile (2 bytes) + 0, //char type + 0x1, 0x2, '[', 'f', 'o', 'o', 0x3, 'b', + 'a', 'r', ']', 's', 'h', 'e', 'e', 't' + }; + /** + * startup-type path. + */ + final byte[] startupString = new byte[] + { + 0, 0, 0, 0, 0, 0, //ref (6 bytes) not used... + 16, 0,//cchFile (2 bytes) + 0, //char type + 0x1, 0x6, '[', 'f', 'o', 'o', 0x3, 'b', + 'a', 'r', ']', 's', 'h', 'e', 'e', 't' + }; + /** + * alt-startup-type path. + */ + final byte[] altStartupString = new byte[] + { + 0, 0, 0, 0, 0, 0, //ref (6 bytes) not used... + 16, 0,//cchFile (2 bytes) + 0, //char type + 0x1, 0x7, '[', 'f', 'o', 'o', 0x3, 'b', + 'a', 'r', ']', 's', 'h', 'e', 'e', 't' + }; + /** + * library-style path. + */ + final byte[] libraryString = new byte[] + { + 0, 0, 0, 0, 0, 0, //ref (6 bytes) not used... + 16, 0,//cchFile (2 bytes) + 0, //char type + 0x1, 0x8, '[', 'f', 'o', 'o', 0x3, 'b', + 'a', 'r', ']', 's', 'h', 'e', 'e', 't' + }; + /** + * record of single-byte string, external, volume path. + */ + final byte[] data1 = new byte[] + { + 0, 0, 0, 0, 0, 0, //ref (6 bytes) not used... + 10, 0,//cchFile (2 bytes) + 0, //char type + 1, 1, (byte) 'b', (byte) 'l', (byte) 'a', (byte) ' ', (byte) 't', + (byte) 'e', (byte) 's', (byte) 't' + //unused doesn't exist as stFile[1] != 2 + }; + /** + * record of double-byte string, self-reference. + */ + final byte[] data2 = new byte[] + { + 0, 0, 0, 0, 0, 0, //ref (6 bytes) not used... + 9, 0,//cchFile (2 bytes) + 1, //char type = unicode + 2, 0, (byte) 'b', 0, (byte) 'l', 0, (byte) 'a', 0, (byte) ' ', 0, (byte) 't', 0, + (byte) 'e', 0, (byte) 's', (byte) 't', 0,//stFile + 0, 0 //unused (2 bytes as we're using double-byte chars) + }; + /** + * record of single-byte string, self-reference. + */ + final byte[] data3 = new byte[] + { + 0, 0, 0, 0, 0, 0, //ref (6 bytes) not used... + 9, 0,//cchFile (2 bytes) + 0, //char type = ansi + 2, (byte) 'b', (byte) 'l', (byte) 'a', (byte) ' ', (byte) 't', (byte) 'e', (byte) 's', + (byte) 't',//stFile + 0 //unused (1 byte as we're using single byes) + }; + /** + * double-byte string, external reference, unc-volume. + */ + final byte[] data4 = new byte[] + { + 0, 0, 0, 0, 0, 0, //ref (6 bytes) not used... + 16, 0,//cchFile (2 bytes) + //stFile starts here: + 1, //char type = unicode + 1, 0, 1, 0, 0x40, 0, (byte) 'c', 0, (byte) 'o', 0, (byte) 'm', 0, (byte) 'p', 0, 0x03, 0, + (byte) 'b', 0, (byte) 'l', 0, (byte) 'a', 0, 0x03, 0, (byte) 't', 0, (byte) 'e', 0, + (byte) 's', 0, (byte) 't', 0, + //unused doesn't exist as stFile[1] != 2 + }; + + /** + * test read-constructor-then-serialize for a single-byte external reference strings of + * various flavours. This uses the RecordInputStream constructor. + * @throws IOException + */ + public void testReadWriteSBExtRef() throws IOException + { + testReadWrite(data1, "read-write single-byte external reference, volume type path"); + testReadWrite(volumeString, + "read-write properly formed single-byte external reference, volume type path"); + testReadWrite(uncVolumeString, + "read-write properly formed single-byte external reference, UNC volume type path"); + testReadWrite(relVolumeString, + "read-write properly formed single-byte external reference, rel-volume type path"); + testReadWrite(simpleFilePathDconString, + "read-write properly formed single-byte external reference, simple-file-path-dcon type path"); + testReadWrite(transferProtocolString, + "read-write properly formed single-byte external reference, transfer-protocol type path"); + testReadWrite(startupString, + "read-write properly formed single-byte external reference, startup type path"); + testReadWrite(altStartupString, + "read-write properly formed single-byte external reference, alt-startup type path"); + testReadWrite(libraryString, + "read-write properly formed single-byte external reference, library type path"); + } + + /** + * test read-constructor-then-serialize for a double-byte external reference 'UNC-Volume' style + * string + *

+ * @throws IOException + */ + public void testReadWriteDBExtRefUncVol() throws IOException + { + testReadWrite(data4, "read-write double-byte external reference, UNC volume type path"); + } + + private void testReadWrite(byte[] data, String message) throws IOException + { + RecordInputStream is = TestcaseRecordInputStream.create(81, data); + DConRefRecord d = new DConRefRecord(is); + ByteArrayOutputStream bos = new ByteArrayOutputStream(data.length); + LittleEndianOutputStream o = new LittleEndianOutputStream(bos); + d.serialize(o); + o.flush(); + + assertTrue(message, Arrays.equals(data, + bos.toByteArray())); + } + + /** + * test read-constructor-then-serialize for a double-byte self-reference style string + *

+ * @throws IOException + */ + public void testReadWriteDBSelfRef() throws IOException + { + testReadWrite(data2, "read-write double-byte self reference"); + } + + /** + * test read-constructor-then-serialize for a single-byte self-reference style string + *

+ * @throws IOException + */ + public void testReadWriteSBSelfRef() throws IOException + { + testReadWrite(data3, "read-write single byte self reference"); + } + + /** + * Test of getDataSize method, of class DConRefRecord. + */ + public void testGetDataSize() + { + DConRefRecord instance = new DConRefRecord(TestcaseRecordInputStream.create(81, data1)); + int expResult = data1.length; + int result = instance.getDataSize(); + assertEquals("single byte external reference, volume type path data size", expResult, result); + instance = new DConRefRecord(TestcaseRecordInputStream.create(81, data2)); + assertEquals("double byte self reference data size", data2.length, instance.getDataSize()); + instance = new DConRefRecord(TestcaseRecordInputStream.create(81, data3)); + assertEquals("single byte self reference data size", data3.length, instance.getDataSize()); + instance = new DConRefRecord(TestcaseRecordInputStream.create(81, data4)); + assertEquals("double byte external reference, UNC volume type path data size", data4.length, + instance.getDataSize()); + } + + /** + * Test of getSid method, of class DConRefRecord. + */ + public void testGetSid() + { + DConRefRecord instance = new DConRefRecord(TestcaseRecordInputStream.create(81, data1)); + short expResult = 81; + short result = instance.getSid(); + assertEquals("SID", expResult, result); + } + + /** + * Test of getPath method, of class DConRefRecord. + * @todo different types of paths. + */ + public void testGetPath() + { + DConRefRecord instance = new DConRefRecord(TestcaseRecordInputStream.create(81, data1)); + byte[] expResult = Arrays.copyOfRange(data1, 9, data1.length); + byte[] result = instance.getPath(); + assertTrue("get path", Arrays.equals(expResult, result)); + } + + /** + * Test of isExternalRef method, of class DConRefRecord. + */ + public void testIsExternalRef() + { + DConRefRecord instance = new DConRefRecord(TestcaseRecordInputStream.create(81, data1)); + assertTrue("external reference", instance.isExternalRef()); + instance = new DConRefRecord(TestcaseRecordInputStream.create(81, data2)); + assertFalse("internal reference", instance.isExternalRef()); + } +}