HPSF: Reading Unicode properties are supported now.

git-svn-id: https://svn.apache.org/repos/asf/jakarta/poi/trunk@352933 13f79535-47bb-0310-9956-ffa450edef68
2002-12-10 06:15:20 +00:00 · 2002-12-10 06:15:20 +00:00 · cb45ae088b
parent 24b1c882e3
commit cb45ae088b
11 changed files with 825 additions and 399 deletions
--- a/src/documentation/xdocs/hpsf/internals.xml
+++ b/src/documentation/xdocs/hpsf/internals.xml
@ -690,6 +690,19 @@
 	<th><p>Property ID string</p></th>
 	<th><p>VT type</p></th>
       </tr>
+
+       <tr>
+	<td><p>0</p></td>
+	<td><p>Dictionary</p></td>
+	<td><p>PID_DICTIONARY</p></td>
+	<td><p>[Special format]</p></td>
+       </tr>
+       <tr>
+	<td><p>1</p></td>
+	<td><p>Code page</p></td>
+	<td><p>PID_CODEPAGE</p></td>
+	<td><p>VT_I2</p></td>
+       </tr>
       <tr>
 	<td><p>2</p></td>
 	<td><p>Category</p></td>
@ -1128,20 +1141,25 @@
    </li>

    <li>
-     <p>What is a <code>FILETIME</code>? The answer can be found for example under
-      <link href="http://www.vbapi.com/ref/f/filetime.html">http://www.vbapi.com/ref/f/filetime.html</link>
+     <p>What is a <code>FILETIME</code>? The answer can be found for example
+      under <link href="http://www.vbapi.com/ref/f/filetime.html">http://www.vbapi.com/ref/f/filetime.html</link>
      or
-      <link href="http://www.cs.rpi.edu/courses/fall01/os/FILETIME.html">http://www.cs.rpi.edu/courses/fall01/os/FILETIME.html</link>. In
-      short:
-      <em>The FILETIME structure holds a date and time associated with a file.
-       The structure identifies a 64-bit integer specifying the number of
-       100-nanosecond intervals which have passed since January 1, 1601. This
-       64-bit value is split into the two dwords stored in the
-       structure.</em></p>
-	    </li>
+      <link href="http://www.cs.rpi.edu/courses/fall01/os/FILETIME.html">http://www.cs.rpi.edu/courses/fall01/os/FILETIME.html</link>.
+      In short: <em>The FILETIME structure holds a date and time associated
+      with a file. The structure identifies a 64-bit integer specifying the
+      number of 100-nanosecond intervals which have passed since January 1,
+      1601. This 64-bit value is split into the two dwords stored in the
+      structure.</em></p>
+    </li>

-     <li>
-      <p>This documentation origins from the <link href="http://www.rainer-klute.de/~klute/Software/poibrowser/doc/HPSF-Description.html">HPSF description</link> available at <link href="http://www.rainer-klute.de/~klute/Software/poibrowser/doc/HPSF-Description.html">http://www.rainer-klute.de/~klute/Software/poibrowser/doc/HPSF-Description.html</link>.</p>
+    <li>
+     <p>Information about the code page property in the
+     DocumentSummaryInformation stream is available at <link
+      href="http://msdn.microsoft.com/library/default.asp?url=/library/en-us/stg/stg/property_id_1.asp">http://msdn.microsoft.com/library/default.asp?url=/library/en-us/stg/stg/property_id_1.asp</link>.</p>
+    </li>
+
+    <li>
+     <p>This documentation origins from the <link href="http://www.rainer-klute.de/~klute/Software/poibrowser/doc/HPSF-Description.html">HPSF description</link> available at <link href="http://www.rainer-klute.de/~klute/Software/poibrowser/doc/HPSF-Description.html">http://www.rainer-klute.de/~klute/Software/poibrowser/doc/HPSF-Description.html</link>.</p>
     </li>
    </ol>
   </section>
--- a/src/java/org/apache/poi/hpsf/DocumentSummaryInformation.java
+++ b/src/java/org/apache/poi/hpsf/DocumentSummaryInformation.java
@ -59,257 +59,236 @@ import java.util.*;
 import org.apache.poi.hpsf.wellknown.*;

 /**
- *  <p>
+ * <p>Convenience class representing a DocumentSummary Information stream in a
+ * Microsoft Office document.</p>
 *
- *  Convenience class representing a DocumentSummary Information stream in a
- *  Microsoft Office document.</p>
- *
- *@author     Rainer Klute (klute@rainer-klute.de)
- *@author     Drew Varner (Drew.Varner closeTo sc.edu)
- *@created    May 10, 2002
- *@see        SummaryInformation
- *@version    $Id: DocumentSummaryInformation.java,v 1.6 2002/05/03 07:29:09
- *      klute Exp $
- *@since      2002-02-09
+ * @author Rainer Klute (klute@rainer-klute.de)
+ * @author Drew Varner (Drew.Varner closeTo sc.edu)
+ * @see SummaryInformation
+ * @version $Id$
+ * @since 2002-02-09
 */
-public class DocumentSummaryInformation extends SpecialPropertySet {
+public class DocumentSummaryInformation extends SpecialPropertySet
+{

    /**
-     *  <p>
+     * <p>Creates a {@link DocumentSummaryInformation} from a given
+     * {@link PropertySet}.</p>
     *
-     *  Creates a {@link DocumentSummaryInformation} from a given {@link
-     *  PropertySet}.</p>
-     *
-     *@param  ps                                      A property set which
-     *      should be created from a document summary information stream.
-     *@exception  UnexpectedPropertySetTypeException  Description of the
-     *      Exception
-     *@throws  UnexpectedPropertySetTypeException     if <var>ps</var> does not
-     *      contain a document summary information stream.
+     * @param ps A property set which should be created from a
+     * document summary information stream.
+     * @throws UnexpectedPropertySetTypeException if <var>ps</var>
+     * does not contain a document summary information stream.
     */
    public DocumentSummaryInformation(final PropertySet ps)
-             throws UnexpectedPropertySetTypeException {
+	throws UnexpectedPropertySetTypeException
+    {
        super(ps);
-        if (!isDocumentSummaryInformation()) {
+        if (!isDocumentSummaryInformation())
            throw new UnexpectedPropertySetTypeException
-                    ("Not a " + getClass().getName());
-        }
+		("Not a " + getClass().getName());
    }



    /**
-     *  <p>
+     * <p>Returns the stream's category (or <code>null</code>).</p>
     *
-     *  Returns the stream's category (or <code>null</code>).</p>
-     *
-     *@return    The category value
+     * @return The category value
     */
-    public String getCategory() {
+    public String getCategory()
+    {
        return (String) getProperty(PropertyIDMap.PID_CATEGORY);
    }



    /**
-     *  <p>
+     * <p>Returns the stream's presentation format (or
+     * <code>null</code>).</p>
     *
-     *  Returns the stream's presentation format (or <code>null</code>).</p>
-     *
-     *@return    The presentationFormat value
+     * @return The presentationFormat value
     */
-    public String getPresentationFormat() {
+    public String getPresentationFormat()
+    {
        return (String) getProperty(PropertyIDMap.PID_PRESFORMAT);
    }



    /**
-     *  <p>
+     * <p>Returns the stream's byte count or 0 if the {@link
+     * DocumentSummaryInformation} does not contain a byte count.</p>
     *
-     *  Returns the stream's byte count or 0 if the {@link
-     *  DocumentSummaryInformation} does not contain a byte count.</p>
-     *
-     *@return    The byteCount value
+     * @return The byteCount value
     */
-    public int getByteCount() {
+    public int getByteCount()
+    {
        return getPropertyIntValue(PropertyIDMap.PID_BYTECOUNT);
    }



    /**
-     *  <p>
+     * <p>Returns the stream's line count or 0 if the {@link
+     * DocumentSummaryInformation} does not contain a line count.</p>
     *
-     *  Returns the stream's line count or 0 if the {@link
-     *  DocumentSummaryInformation} does not contain a line count.</p>
-     *
-     *@return    The lineCount value
+     * @return The lineCount value
     */
-    public int getLineCount() {
+    public int getLineCount()
+    {
        return getPropertyIntValue(PropertyIDMap.PID_LINECOUNT);
    }



    /**
-     *  <p>
+     * <p>Returns the stream's par count or 0 if the {@link
+     * DocumentSummaryInformation} does not contain a par count.</p>
     *
-     *  Returns the stream's par count or 0 if the {@link
-     *  DocumentSummaryInformation} does not contain a par count.</p>
-     *
-     *@return    The parCount value
+     * @return The parCount value
     */
-    public int getParCount() {
+    public int getParCount()
+    {
        return getPropertyIntValue(PropertyIDMap.PID_PARCOUNT);
    }



    /**
-     *  <p>
+     * <p>Returns the stream's slide count or 0 if the {@link
+     * DocumentSummaryInformation} does not contain a slide count.</p>
     *
-     *  Returns the stream's slide count or 0 if the {@link
-     *  DocumentSummaryInformation} does not contain a slide count.</p>
-     *
-     *@return    The slideCount value
+     * @return The slideCount value
     */
-    public int getSlideCount() {
+    public int getSlideCount()
+    {
        return getPropertyIntValue(PropertyIDMap.PID_SLIDECOUNT);
    }



    /**
-     *  <p>
+     * <p>Returns the stream's note count or 0 if the {@link
+     * DocumentSummaryInformation} does not contain a note count.</p>
     *
-     *  Returns the stream's note count or 0 if the {@link
-     *  DocumentSummaryInformation} does not contain a note count.</p>
-     *
-     *@return    The noteCount value
+     * @return The noteCount value
     */
-    public int getNoteCount() {
+    public int getNoteCount()
+    {
        return getPropertyIntValue(PropertyIDMap.PID_NOTECOUNT);
    }



    /**
-     *  <p>
+     * <p>Returns the stream's hidden count or 0 if the {@link
+     * DocumentSummaryInformation} does not contain a hidden
+     * count.</p>
     *
-     *  Returns the stream's hidden count or 0 if the {@link
-     *  DocumentSummaryInformation} does not contain a hidden count.</p>
-     *
-     *@return    The hiddenCount value
+     * @return The hiddenCount value
     */
-    public int getHiddenCount() {
+    public int getHiddenCount()
+    {
        return getPropertyIntValue(PropertyIDMap.PID_HIDDENCOUNT);
    }



    /**
-     *  <p>
+     * <p>Returns the stream's mmclip count or 0 if the {@link
+     * DocumentSummaryInformation} does not contain a mmclip
+     * count.</p>
     *
-     *  Returns the stream's mmclip count or 0 if the {@link
-     *  DocumentSummaryInformation} does not contain a mmclip count.</p>
-     *
-     *@return    The mMClipCount value
+     * @return The mMClipCount value
     */
-    public int getMMClipCount() {
+    public int getMMClipCount()
+    {
        return getPropertyIntValue(PropertyIDMap.PID_MMCLIPCOUNT);
    }



    /**
-     *  <p>
+     * <p>Returns <code>true</code> when scaling of the thumbnail is
+     * desired, <code>false</code> if cropping is desired.</p>
     *
-     *  Returns <code>true</code> when scaling of the thumbnail is desired,
-     *  <code>false</code> if cropping is desired.</p>
-     *
-     *@return    The scale value
+     * @return The scale value
     */
-    public boolean getScale() {
+    public boolean getScale()
+    {
        return getPropertyBooleanValue(PropertyIDMap.PID_SCALE);
    }



    /**
-     *  <p>
+     * <p>Returns the stream's heading pair (or <code>null</code>)
+     * <strong>when this method is implemented. Please note that the
+     * return type is likely to change!</strong>
     *
-     *  Returns the stream's heading pair (or <code>null</code>) <strong>when
-     *  this method is implemented. Please note that the return type is likely
-     *  to change!</strong>
-     *
-     *@return    The headingPair value
+     * @return The headingPair value
     */
-    public byte[] getHeadingPair() {
-        if (true) {
+    public byte[] getHeadingPair()
+    {
+        if (true)
            throw new UnsupportedOperationException("FIXME");
-        }
        return (byte[]) getProperty(PropertyIDMap.PID_HEADINGPAIR);
    }



    /**
-     *  <p>
+     * <p>Returns the stream's doc parts (or <code>null</code>)
+     * <strong>when this method is implemented. Please note that the
+     * return type is likely to change!</strong>
     *
-     *  Returns the stream's doc parts (or <code>null</code>) <strong>when this
-     *  method is implemented. Please note that the return type is likely to
-     *  change!</strong>
-     *
-     *@return    The docparts value
+     * @return The docparts value
     */
-    public byte[] getDocparts() {
-        if (true) {
+    public byte[] getDocparts()
+    {
+        if (true)
            throw new UnsupportedOperationException("FIXME");
-        }
        return (byte[]) getProperty(PropertyIDMap.PID_DOCPARTS);
    }



    /**
-     *  <p>
+     * <p>Returns the stream's manager (or <code>null</code>).</p>
     *
-     *  Returns the stream's manager (or <code>null</code>).</p>
-     *
-     *@return    The manager value
+     * @return The manager value
     */
-    public String getManager() {
+    public String getManager()
+    {
        return (String) getProperty(PropertyIDMap.PID_MANAGER);
    }



    /**
-     *  <p>
+     * <p>Returns the stream's company (or <code>null</code>).</p>
     *
-     *  Returns the stream's company (or <code>null</code>).</p>
-     *
-     *@return    The company value
+     * @return The company value
     */
-    public String getCompany() {
+    public String getCompany()
+    {
        return (String) getProperty(PropertyIDMap.PID_COMPANY);
    }



    /**
-     *  <p>
+     * <p>Returns <code>true</code> if the custom links are hampered
+     * by excessive noise, for all applications.</p> <p>
     *
-     *  Returns <code>true</code> if the custom links are hampered by excessive
-     *  noise, for all applications.</p> <p>
+     * <strong>FIXME:</strong> Explain this some more! I (Rainer)
+     * don't understand it.</p>
     *
-     *  <strong>FIXME:</strong> Explain this some more! I (Rainer) don't
-     *  understand it.</p>
-     *
-     *@return    The linksDirty value
+     * @return The linksDirty value
     */
-    public boolean getLinksDirty() {
+    public boolean getLinksDirty()
+    {
        return getPropertyBooleanValue(PropertyIDMap.PID_LINKSDIRTY);
    }

--- a/src/java/org/apache/poi/hpsf/Property.java
+++ b/src/java/org/apache/poi/hpsf/Property.java
@ -81,10 +81,9 @@ import org.apache.poi.util.LittleEndian;
 * value, {@link Variant#VT_FILETIME} some date and time (of a
 * file).</p>
 *
- * <p><strong>FIXME:</strong> Reading of other types than {@link
- * Variant#VT_I4}, {@link Variant#VT_FILETIME}, {@link
- * Variant#VT_LPSTR}, {@link Variant#VT_CF}, {@link Variant#VT_BOOL},
- * and reading the dictionary property is not yet implemented.</p>
+ * <p><strong>FIXME:</strong> Reading is not implemented for all
+ * {@link Variant} types yet. Feel free to submit error reports or
+ * patches for the types you need.</p>
 *
 * @author Rainer Klute (klute@rainer-klute.de)
 * @author Drew Varner (Drew.Varner InAndAround sc.edu)
@ -96,6 +95,9 @@ import org.apache.poi.util.LittleEndian;
 public class Property
 {

+    /* Codepage 1200 denotes Unicode. */
+    private static int CP_UNICODE = 1200;
+
    private int id;


@ -150,121 +152,37 @@ public class Property
     * @param offset The property's type/value pair's offset in the
     * section.
     * @param length The property's type/value pair's length in bytes.
+     * @param codepage The section's and thus the property's
+     * codepage. It is needed only when reading string values.
     */
    public Property(final int id, final byte[] src, final long offset,
-		    int length)
+		    int length, int codepage)
    {
        this.id = id;

        /*
-         *  ID 0 is a special case since it specifies a dictionary of
-         *  property IDs and property names.
+         * ID 0 is a special case since it specifies a dictionary of
+         * property IDs and property names.
         */
        if (id == 0)
 	{
-            value = readDictionary(src, offset, length);
+            value = readDictionary(src, offset, length, codepage);
            return;
        }

-        /*
-         *  FIXME: Support this!
-         */
-//        /* ID 1 is another special case: It denotes the code page of
-//         * byte strings in this section. */
-//        if (id == 1)
-//        {
-//            value = readCodepage(src, offset);
-//            return;
-//        }
-
        int o = (int) offset;
        type = LittleEndian.getUInt(src, o);
        o += LittleEndian.INT_SIZE;

-        /*
-         *  FIXME: Support reading more types!
-         */
-        switch ((int)type) {
-            case Variant.VT_I4:
-            {
-                /*
-                 *  Read a word. In Java it is represented as an
-                 *  Integer object.
-                 */
-                value = new Long(LittleEndian.getUInt(src, o));
-                break;
-            }
-            case Variant.VT_FILETIME:
-            {
-                /*
-                 *  Read a FILETIME object. In Java it is represented
-                 *  as a Date.
-                 */
-                final long low = LittleEndian.getUInt(src, o);
-                o += LittleEndian.INT_SIZE;
-                final long high = LittleEndian.getUInt(src, o);
-                value = Util.filetimeToDate((int)high, (int)low);
-                break;
-            }
-            case Variant.VT_LPSTR:
-            {
-                /*
-                 *  Read a byte string. In Java it is represented as a
-                 *  String. The null bytes at the end of the byte
-                 *  strings must be stripped.
-                 */
-                final int first = o + LittleEndian.INT_SIZE;
-                long last = first + LittleEndian.getUInt(src, o) - 1;
-                o += LittleEndian.INT_SIZE;
-                while (src[(int)last] == 0 && first <= last) {
-                    last--;
-                }
-                value = new String(src, (int)first, (int)(last - first + 1));
-                break;
-            }
-            case Variant.VT_CF:
-            {
-                /*
-                 *  The first four bytes in src, from rc[offset] to
-                 *  src[offset + 3] contain the DWord for VT_CF, so
-                 *  skip it, we don't need it.
-                 */
-                /*
-                 *  Truncate the length of the return array by a DWord
-                 *  length (4 bytes).
-                 */
-                length = length - LittleEndian.INT_SIZE;
-
-                final byte[] v = new byte[length];
-                for (int i = 0; i < length; i++)
-                    v[i] = src[(int)(o + i)];
-		value = v;
-                break;
-            }
-            case Variant.VT_BOOL:
-            {
-                /*
-                 *  The first four bytes in src, from src[offset] to
-                 *  src[offset + 3] contain the DWord for VT_BOOL, so
-                 *  skip it, we don't need it.
-                 */
-                final int first = o + LittleEndian.INT_SIZE;
-                long bool = LittleEndian.getUInt(src, o);
-                if (bool != 0)
-                    value = new Boolean(true);
-                else
-                    value = new Boolean(false);
-		break;
-            }
-            default:
-            {
-                final byte[] v = new byte[length];
-                for (int i = 0; i < length; i++)
-                    v[i] = src[(int)(offset + i)];
-		value = v;
-                break;
-            }
-        }
+	try
+	{
+	    value = TypeReader.read(src, o, length, (int) type);
+	}
+	catch (Throwable t)
+	{
+	    t.printStackTrace();
+	    value = "*** null ***";
+	}
    }


@ -277,64 +195,67 @@ public class Property
     * @param offset At this offset within <var>src</var> the
     * dictionary starts.
     * @param length The dictionary contains at most this many bytes.
+     * @param codepage The codepage of the string values.
     * @return The dictonary
     */
    protected Map readDictionary(final byte[] src, final long offset,
-				 final int length)
+				 final int length, final int codepage)
    {
-        /*
-         *  FIXME: Check the length!
-         */
-        int o = (int)offset;
+	/* Check whether "offset" points into the "src" array". */
+	if (offset < 0 || offset > src.length)
+	    throw new HPSFRuntimeException
+		("Illegal offset " + offset + " while HPSF stream contains " +
+		 length + " bytes.");
+        int o = (int) offset;

        /*
-         *  Read the number of dictionary entries.
+         * Read the number of dictionary entries.
         */
        final long nrEntries = LittleEndian.getUInt(src, o);
        o += LittleEndian.INT_SIZE;

-        final Map m = new HashMap((int)nrEntries, (float) 1.0);
+        final Map m = new HashMap((int) nrEntries, (float) 1.0);
        for (int i = 0; i < nrEntries; i++)
 	{
-            /*
-             *  The key
-             */
+            /* The key. */
            final Long id = new Long(LittleEndian.getUInt(src, o));
            o += LittleEndian.INT_SIZE;

-            /*
-             *  The value (a string)
-             */
-            final long sLength = LittleEndian.getUInt(src, o);
+            /* The value (a string). The length is the either the
+             * number of characters if the character set is Unicode or
+             * else the number of bytes. The length includes
+             * terminating 0x00 bytes which we have to strip off to
+             * create a Java string. */
+            long sLength = LittleEndian.getUInt(src, o);
            o += LittleEndian.INT_SIZE;

-            /*
-             *  Strip trailing 0x00 bytes.
-             */
-            long l = sLength;
-            while (src[(int)(o + l - 1)] == 0x00)
-                l--;
-            final String s = new String(src, o, (int)l);
-            o += sLength;
-            m.put(id, s);
+            /* Read the bytes or characters depending on whether the
+             * character set is Unicode or not. */
+	    StringBuffer b = new StringBuffer((int) sLength);
+	    for (int j = 0; j < sLength; j++)
+		if (codepage == CP_UNICODE)
+		{
+		    final int i1 = o + (j * 2);
+		    final int i2 = i1 + 1;
+		    b.append((char) ((src[i2] << 8) + src[i1]));
+		}
+		else
+		    b.append((char) src[o + j]);
+
+	    /* Strip 0x00 characters from the end of the string: */
+	    while (b.charAt(b.length() - 1) == 0x00)
+		b.setLength(b.length() - 1);
+	    if (codepage == CP_UNICODE)
+	    {
+		if (sLength % 2 == 1)
+		    sLength++;
+		o += (sLength + sLength);
+	    }
+	    else
+		o += sLength;
+            m.put(id, b.toString());
        }
        return m;
    }

-
-
-    /**
-     * <p>Reads a code page.</p>
-     *
-     * @param src The byte array containing the bytes making out the
-     * code page.
-     * @param offset At this offset within <var>src</var> the code
-     * page starts.
-     * @return The code page.
-     */
-    protected int readCodePage(final byte[] src, final long offset)
-    {
-        throw new UnsupportedOperationException("FIXME");
-    }
-
 }
--- a/src/java/org/apache/poi/hpsf/Section.java
+++ b/src/java/org/apache/poi/hpsf/Section.java
@ -161,66 +161,117 @@ public class Section
    public Section(final byte[] src, int offset)
    {
        /*
-         *  Read the format ID.
+         * Read the format ID.
         */
        formatID = new ClassID(src, offset);
        offset += ClassID.LENGTH;

        /*
-         *  Read the offset from the stream's start and positions to
-         *  the section header.
+         * Read the offset from the stream's start and positions to
+         * the section header.
         */
        this.offset = LittleEndian.getUInt(src, offset);
        offset = (int)this.offset;

        /*
-         *  Read the section length.
+         * Read the section length.
         */
        size = (int)LittleEndian.getUInt(src, offset);
        offset += LittleEndian.INT_SIZE;

        /*
-         *  Read the number of properties.
+         * Read the number of properties.
         */
        propertyCount = (int)LittleEndian.getUInt(src, offset);
        offset += LittleEndian.INT_SIZE;

        /*
-         *  Read the properties. The offset is positioned at the first
-         *  entry of the property list.
+         * Read the properties. The offset is positioned at the first
+         * entry of the property list. The problem is that we have to
+         * read the property with ID 1 before we read other
+         * properties, at least before other properties containing
+         * strings. The reason is that property 1 specifies the
+         * codepage. If it is 1200, all strings are in Unicode. In
+         * other words: Before we can read any strings we have to know
+         * whether they are in Unicode or not. Unfortunately property
+         * 1 is not guaranteed to be the first in a section.
+	 *
+	 * The algorithm below reads the properties in two passes: The
+	 * first one looks for property ID 1 and extracts the codepage
+	 * number. The seconds pass reads the other properties.
         */
        properties = new Property[propertyCount];
-        for (int i = 0; i < properties.length; i++) {
-            final int id = (int)LittleEndian.getUInt(src, offset);
-            offset += LittleEndian.INT_SIZE;
+	Property propertyOne;

-            /*
-             *  Offset from the section.
-             */
-            final int sOffset = (int)LittleEndian.getUInt(src, offset);
-            offset += LittleEndian.INT_SIZE;
+ 	/* Pass 1: Look for the codepage. */
+ 	int codepage = -1;
+	int pass1Offset = offset;
+        for (int i = 0; i < properties.length; i++)
+	{
+	    /* Read the property ID. */
+            final int id = (int) LittleEndian.getUInt(src, pass1Offset);
+            pass1Offset += LittleEndian.INT_SIZE;

-            /*
-             *  Calculate the length of the property.
-             */
+            /* Offset from the section's start. */
+            final int sOffset = (int) LittleEndian.getUInt(src, pass1Offset);
+            pass1Offset += LittleEndian.INT_SIZE;
+
+            /* Calculate the length of the property. */
            int length;
-            if (i == properties.length - 1) {
-                length = (int)(src.length - this.offset - sOffset);
-            } else {
+            if (i == properties.length - 1)
+                length = (int) (src.length - this.offset - sOffset);
+            else
+                length = (int)
+                    LittleEndian.getUInt(src, pass1Offset +
+					 LittleEndian.INT_SIZE) - sOffset;
+
+	    if (id == PropertyIDMap.PID_CODEPAGE)
+	    {
+		/* Read the codepage if the property ID is 1. */
+
+		/* Read the property's value type. It must be
+		 * VT_I2. */
+		int o = (int) (this.offset + sOffset);
+		final long type = LittleEndian.getUInt(src, o);
+		o += LittleEndian.INT_SIZE;
+
+		if (type != Variant.VT_I2)
+		    throw new HPSFRuntimeException
+			("Value type of property ID 1 is not VT_I2 but " +
+			 type + ".");
+
+                /* Read the codepage number. */
+                codepage = LittleEndian.getUShort(src, o);
+	    }
+	}
+
+	/* Pass 2: Read all properties, including 1. */
+        for (int i = 0; i < properties.length; i++)
+	{
+	    /* Read the property ID. */
+            final int id = (int) LittleEndian.getUInt(src, offset);
+            offset += LittleEndian.INT_SIZE;
+
+            /* Offset from the section. */
+            final int sOffset = (int) LittleEndian.getUInt(src, offset);
+            offset += LittleEndian.INT_SIZE;
+
+            /* Calculate the length of the property. */
+            int length;
+            if (i == properties.length - 1)
+                length = (int) (src.length - this.offset - sOffset);
+            else
                length = (int)
                    LittleEndian.getUInt(src, offset + LittleEndian.INT_SIZE) -
                    sOffset;
-            }

-            /*
-             *  Create it.
-             */
-            properties[i] =
-                    new Property(id, src, this.offset + sOffset, length);
+            /* Create it. */
+            properties[i] = new Property(id, src, this.offset + sOffset,
+					 length, codepage);
        }

        /*
-         *  Extract the dictionary (if available).
+         * Extract the dictionary (if available).
         */
        dictionary = (Map) getProperty(0);
    }
@ -237,7 +288,7 @@ public class Section
     *
     * @return The property's value
     */
-    protected Object getProperty(final int id)
+    public Object getProperty(final int id)
    {
        wasNull = false;
        for (int i = 0; i < properties.length; i++)
--- a/src/java/org/apache/poi/hpsf/TypeReader.java
+++ b/src/java/org/apache/poi/hpsf/TypeReader.java
@ -0,0 +1,208 @@
+/*
+ *  ====================================================================
+ *  The Apache Software License, Version 1.1
+ *
+ *  Copyright (c) 2000 The Apache Software Foundation.  All rights
+ *  reserved.
+ *
+ *  Redistribution and use in source and binary forms, with or without
+ *  modification, are permitted provided that the following conditions
+ *  are met:
+ *
+ *  1. Redistributions of source code must retain the above copyright
+ *  notice, this list of conditions and the following disclaimer.
+ *
+ *  2. Redistributions in binary form must reproduce the above copyright
+ *  notice, this list of conditions and the following disclaimer in
+ *  the documentation and/or other materials provided with the
+ *  distribution.
+ *
+ *  3. The end-user documentation included with the redistribution,
+ *  if any, must include the following acknowledgment:
+ *  "This product includes software developed by the
+ *  Apache Software Foundation (http://www.apache.org/)."
+ *  Alternately, this acknowledgment may appear in the software itself,
+ *  if and wherever such third-party acknowledgments normally appear.
+ *
+ *  4. The names "Apache" and "Apache Software Foundation" must
+ *  not be used to endorse or promote products derived from this
+ *  software without prior written permission. For written
+ *  permission, please contact apache@apache.org.
+ *
+ *  5. Products derived from this software may not be called "Apache",
+ *  nor may "Apache" appear in their name, without prior written
+ *  permission of the Apache Software Foundation.
+ *
+ *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ *  OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ *  DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ *  ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ *  USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ *  ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ *  OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ *  OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ *  SUCH DAMAGE.
+ *  ====================================================================
+ *
+ *  This software consists of voluntary contributions made by many
+ *  individuals on behalf of the Apache Software Foundation.  For more
+ *  information on the Apache Software Foundation, please see
+ *  <http://www.apache.org/>.
+ *
+ *  Portions of this software are based upon public domain software
+ *  originally written at the National Center for Supercomputing Applications,
+ *  University of Illinois, Urbana-Champaign.
+ *
+ *  Portions of this software are based upon public domain software
+ *  originally written at the National Center for Supercomputing Applications,
+ *  University of Illinois, Urbana-Champaign.
+ */
+package org.apache.poi.hpsf;
+
+import java.util.*;
+import org.apache.poi.util.LittleEndian;
+
+/**
+ * <p>Reader for specific data types.</p>
+ *
+ * @author Rainer Klute (klute@rainer-klute.de)
+ * @see Property
+ * @see Variant
+ * @version $Id$
+ * @since 2002-12-09
+ */
+public class TypeReader
+{
+
+    /**
+     * <p>Reads a variant data type from a byte array.</p>
+     *
+     * @param src The byte array
+     * @param offset The offset in the byte array where the variant
+     * starts
+     * @param length The length of the variant including the variant
+     * type field
+     * @return A Java object that corresponds best to the variant
+     * field. For example, a VT_I4 is returned as a {@link Long}, a
+     * VT_LPSTR as a {@link String}.
+     *
+     * @see Variant
+     */
+    public static Object read(final byte[] src, int offset, int length,
+			      final int type)
+    {
+	/*
+	 * FIXME: Support reading more types and clean up this code!
+	 */
+	Object value;
+	length = length - LittleEndian.INT_SIZE;
+        switch (type)
+	{
+            case Variant.VT_I2:
+            {
+                /*
+                 * Read a short. In Java it is represented as an
+                 * Integer object.
+                 */
+                value = new Integer(LittleEndian.getUShort(src, offset));
+                break;
+            }
+            case Variant.VT_I4:
+            {
+                /*
+                 * Read a word. In Java it is represented as a
+                 * Long object.
+                 */
+                value = new Long(LittleEndian.getUInt(src, offset));
+                break;
+            }
+            case Variant.VT_FILETIME:
+            {
+                /*
+                 * Read a FILETIME object. In Java it is represented
+                 * as a Date object.
+                 */
+                final long low = LittleEndian.getUInt(src, offset);
+                offset += LittleEndian.INT_SIZE;
+                final long high = LittleEndian.getUInt(src, offset);
+                value = Util.filetimeToDate((int) high, (int) low);
+                break;
+            }
+            case Variant.VT_LPSTR:
+            {
+                /*
+                 * Read a byte string. In Java it is represented as a
+                 * String object. The 0x00 bytes at the end must be
+                 * stripped.
+                 */
+                final int first = offset + LittleEndian.INT_SIZE;
+                long last = first + LittleEndian.getUInt(src, offset) - 1;
+                offset += LittleEndian.INT_SIZE;
+                while (src[(int) last] == 0 && first <= last)
+                    last--;
+                value = new String(src, (int) first, (int) (last - first + 1));
+		break;
+            }
+            case Variant.VT_LPWSTR:
+            {
+                /*
+                 * Read a Unicode string. In Java it is represented as
+                 * a String object. The 0x00 bytes at the end must be
+                 * stripped.
+                 */
+                final int first = offset + LittleEndian.INT_SIZE;
+                long last = first + LittleEndian.getUInt(src, offset) - 1;
+		long l = last - first;
+                offset += LittleEndian.INT_SIZE;
+		StringBuffer b = new StringBuffer((int) (last - first));
+		for (int i = 0; i <= l; i++)
+		{
+		    final int i1 = offset + (i * 2);
+		    final int i2 = i1 + 1;
+		    b.append((char) ((src[i2] << 8) + src[i1]));
+		}
+		/* Strip 0x00 characters from the end of the string: */
+		while (b.charAt(b.length() - 1) == 0x00)
+		    b.setLength(b.length() - 1);
+		value = b.toString();
+		break;
+            }
+            case Variant.VT_CF:
+            {
+                final byte[] v = new byte[length];
+                for (int i = 0; i < length; i++)
+                    v[i] = src[(int) (offset + i)];
+		value = v;
+                break;
+            }
+            case Variant.VT_BOOL:
+            {
+                /*
+                 * The first four bytes in src, from src[offset] to
+                 * src[offset + 3] contain the DWord for VT_BOOL, so
+                 * skip it, we don't need it.
+                 */
+                final int first = offset + LittleEndian.INT_SIZE;
+                long bool = LittleEndian.getUInt(src, offset);
+                if (bool != 0)
+                    value = new Boolean(true);
+                else
+                    value = new Boolean(false);
+		break;
+            }
+            default:
+            {
+                final byte[] v = new byte[length];
+                for (int i = 0; i < length; i++)
+                    v[i] = src[(int) (offset + i)];
+		value = v;
+                break;
+            }
+        }
+	return value;
+    }
+
+}
--- a/src/java/org/apache/poi/hpsf/wellknown/PropertyIDMap.java
+++ b/src/java/org/apache/poi/hpsf/wellknown/PropertyIDMap.java
@ -100,25 +100,110 @@ public class PropertyIDMap extends HashMap
    public final static int PID_APPNAME = 18;
    public final static int PID_SECURITY = 19;

+
+
    /*
     * The following definitions are for the Document Summary Information.
     */
+
+    /** 
+     * <p>The entry is a dictionary.</p>
+     */
+    public final static int PID_DICTIONARY = 0;
+
+    /**
+     * <p>The entry denotes a code page.</p>
+     */
+    public final static int PID_CODEPAGE = 1;
+
+    /** 
+     * <p>The entry is a string denoting the category the file belongs
+     * to, e.g. review, memo, etc. This is useful to find documents of
+     * same type.</p>
+     */
    public final static int PID_CATEGORY = 2;
+
+    /** 
+     * <p>Target format for power point presentation, e.g. 35mm,
+     * printer, video etc.</p>
+     */
    public final static int PID_PRESFORMAT = 3;
+
+    /** 
+     * <p>Number of bytes.</p>
+     */
    public final static int PID_BYTECOUNT = 4;
+
+    /** 
+     * <p>Number of lines.</p>
+     */
    public final static int PID_LINECOUNT = 5;
+
+    /** 
+     * <p>Number of paragraphs.</p>
+     */
    public final static int PID_PARCOUNT = 6;
+
+    /** 
+     * <p>Number of slides in a power point presentation.</p>
+     */
    public final static int PID_SLIDECOUNT = 7;
+
+    /** 
+     * <p>Number of slides with notes.</p>
+     */
    public final static int PID_NOTECOUNT = 8;
+
+    /** 
+     * <p>Number of hidden slides.</p>
+     */
    public final static int PID_HIDDENCOUNT = 9;
+
+    /** 
+     * <p>Number of multimedia clips, e.g. sound or video.</p>
+     */
    public final static int PID_MMCLIPCOUNT = 10;
+
+    /** 
+     * <p>This entry is set to -1 when scaling of the thumbnail is
+     * desired. Otherwise the thumbnail should be cropped.</p>
+     */
    public final static int PID_SCALE = 11;
+
+    /** 
+     * <p>This entry denotes an internally used property. It is a
+     * vector of variants consisting of pairs of a string (VT_LPSTR)
+     * and a number (VT_I4). The string is a heading name, and the
+     * number tells how many document parts are under that
+     * heading.</p>
+     */
    public final static int PID_HEADINGPAIR = 12;
+
+    /** 
+     * <p>This entry contains the names of document parts (word: names
+     * of the documents in the master document, excel: sheet names,
+     * power point: slide titles, binder: document names).</p>
+     */
    public final static int PID_DOCPARTS = 13;
+
+    /** 
+     * <p>This entry contains the name of the project manager.</p>
+     */
    public final static int PID_MANAGER = 14;
+
+    /** 
+     * <p>This entry contains the company name.</p>
+     */
    public final static int PID_COMPANY = 15;
+
+    /** 
+     * <p>If this entry is -1 the links are dirty and should be
+     * re-evaluated.</p>
+     */
    public final static int PID_LINKSDIRTY = 16;

+
+
    /**
     * <p>Contains the summary information property ID values and
     * associated strings. See the overall HPSF documentation for
@ -184,7 +269,7 @@ public class PropertyIDMap extends HashMap
    {
        if (summaryInformationProperties == null)
 	{
-            PropertyIDMap m = new PropertyIDMap(17, (float) 1.0);
+            PropertyIDMap m = new PropertyIDMap(18, (float) 1.0);
            m.put(PID_TITLE, "PID_TITLE");
            m.put(PID_SUBJECT, "PID_SUBJECT");
            m.put(PID_AUTHOR, "PID_AUTHOR");
@ -221,6 +306,8 @@ public class PropertyIDMap extends HashMap
        if (documentSummaryInformationProperties == null)
 	{
            PropertyIDMap m = new PropertyIDMap(17, (float) 1.0);
+            m.put(PID_DICTIONARY, "PID_DICTIONARY");
+            m.put(PID_CODEPAGE, "PID_CODEPAGE");
            m.put(PID_CATEGORY, "PID_CATEGORY");
            m.put(PID_PRESFORMAT, "PID_PRESFORMAT");
            m.put(PID_BYTECOUNT, "PID_BYTECOUNT");
--- a/src/java/org/apache/poi/hpsf/wellknown/SectionIDMap.java
+++ b/src/java/org/apache/poi/hpsf/wellknown/SectionIDMap.java
@ -57,54 +57,51 @@ package org.apache.poi.hpsf.wellknown;
 import java.util.*;

 /**
- *  <p>
+ * <p>Maps section format IDs to {@link PropertyIDMap}s. It is
+ * initialized with two well-known section format IDs: those of the
+ * <tt>\005SummaryInformation</tt> stream and the
+ * <tt>\005DocumentSummaryInformation</tt> stream.</p>
 *
- *  Maps section format IDs to {@link PropertyIDMap}s. It is initialized with
- *  two well-known section format IDs: those of the <tt>\005SummaryInformation
- *  </tt> stream and the <tt>\005DocumentSummaryInformation stream.</p> <p>
+ * <p>If you have a section format ID you can use it as a key to query
+ * this map.  If you get a {@link PropertyIDMap} returned your section
+ * is well-known and you can query the {@link PropertyIDMap} for PID
+ * strings. If you get back <code>null</code> you are on your own.</p>
 *
- *  If you have a section format ID you can use it as a key to query this map.
- *  If you get a {@link PropertyIDMap} returned your section is well-known and
- *  you can query the {@link PropertyIDMap} for PID strings. If you get back
- *  <code>null</code> you are on your own.</p> <p>
+ * <p>This {@link Map} expects the byte arrays of section format IDs
+ * as keys. A key maps to a {@link PropertyIDMap} describing the
+ * property IDs in sections with the specified section format ID.</p>
 *
- *  This {@link Map} expects the byte arrays of section format IDs as keys. A
- *  key maps to a {@link PropertyIDMap} describing the property IDs in sections
- *  with the specified section format ID.</p>
- *
- *@author     Rainer Klute (klute@rainer-klute.de)
- *@created    May 10, 2002
- *@version    $Id$
- *@since      2002-02-09
+ * @author Rainer Klute (klute@rainer-klute.de)
+ * @version $Id$
+ * @since 2002-02-09
 */
-public class SectionIDMap extends HashMap {
+public class SectionIDMap extends HashMap
+{

    /**
-     *  <p>
-     *
-     *  The SummaryInformation's section's format ID.</p>
+     * <p>The SummaryInformation's section's format ID.</p>
     */
-    public final static byte[] SUMMARY_INFORMATION_ID =
-            new byte[]{(byte) 0xF2, (byte) 0x9F, (byte) 0x85, (byte) 0xE0,
-            (byte) 0x4F, (byte) 0xF9, (byte) 0x10, (byte) 0x68,
-            (byte) 0xAB, (byte) 0x91, (byte) 0x08, (byte) 0x00,
-            (byte) 0x2B, (byte) 0x27, (byte) 0xB3, (byte) 0xD9};
+    public final static byte[] SUMMARY_INFORMATION_ID = new byte[]
+    {
+	(byte) 0xF2, (byte) 0x9F, (byte) 0x85, (byte) 0xE0,
+	(byte) 0x4F, (byte) 0xF9, (byte) 0x10, (byte) 0x68,
+	(byte) 0xAB, (byte) 0x91, (byte) 0x08, (byte) 0x00,
+	(byte) 0x2B, (byte) 0x27, (byte) 0xB3, (byte) 0xD9
+    };

    /**
-     *  <p>
-     *
-     *  The DocumentSummaryInformation's first section's format ID. The second
-     *  section has a different format ID which is not well-known.</p>
+     * <p>The DocumentSummaryInformation's first section's format
+     * ID. The second section has a different format ID which is not
+     * well-known.</p>
     */
-    public final static byte[] DOCUMENT_SUMMARY_INFORMATION_ID =
-            new byte[]{(byte) 0xD5, (byte) 0xCD, (byte) 0xD5, (byte) 0x02,
-            (byte) 0x2E, (byte) 0x9C, (byte) 0x10, (byte) 0x1B,
-            (byte) 0x93, (byte) 0x97, (byte) 0x08, (byte) 0x00,
-            (byte) 0x2B, (byte) 0x2C, (byte) 0xF9, (byte) 0xAE};
+    public final static byte[] DOCUMENT_SUMMARY_INFORMATION_ID = new byte[]
+    {
+	(byte) 0xD5, (byte) 0xCD, (byte) 0xD5, (byte) 0x02,
+	(byte) 0x2E, (byte) 0x9C, (byte) 0x10, (byte) 0x1B,
+	(byte) 0x93, (byte) 0x97, (byte) 0x08, (byte) 0x00,
+	(byte) 0x2B, (byte) 0x2C, (byte) 0xF9, (byte) 0xAE
+    };

-    /**
-     *  Description of the Field
-     */
    public final static String UNDEFINED = "[undefined]";

    private static SectionIDMap defaultMap;
@ -112,19 +109,20 @@ public class SectionIDMap extends HashMap {


    /**
-     *  <p>
+     * <p>Returns the singleton instance of the default {@link
+     * SectionIDMap}.</p>
     *
-     *  Returns the singleton instance of the default {@link SectionIDMap}.</p>
-     *
-     *@return    The instance value
+     * @return The instance value
     */
-    public static SectionIDMap getInstance() {
-        if (defaultMap == null) {
+    public static SectionIDMap getInstance()
+    {
+        if (defaultMap == null)
+	{
            final SectionIDMap m = new SectionIDMap();
            m.put(SUMMARY_INFORMATION_ID,
-                    PropertyIDMap.getSummaryInformationProperties());
+		  PropertyIDMap.getSummaryInformationProperties());
            m.put(DOCUMENT_SUMMARY_INFORMATION_ID,
-                    PropertyIDMap.getDocumentSummaryInformationProperties());
+		  PropertyIDMap.getDocumentSummaryInformationProperties());
            defaultMap = m;
        }
        return defaultMap;
@ -133,31 +131,30 @@ public class SectionIDMap extends HashMap {


    /**
-     *  <p>
+     * <p>Returns the property ID string that is associated with a
+     * given property ID in a section format ID's namespace.</p>
     *
-     *  Returns the property ID string that is associated with a given property
-     *  ID in a section format ID's namespace.</p>
-     *
-     *@param  sectionFormatID  Each section format ID has its own name space of
-     *      property ID strings and thus must be specified.
-     *@param  pid              The property ID
-     *@return                  The well-known property ID string associated with
-     *      the property ID <var>pid</var> in the name space spanned by <var>
-     *      sectionFormatID</var> . If the <var>pid</var> /<var>sectionFormatID
-     *      </var> combination is not well-known, the string "[undefined]" is
-     *      returned.
+     * @param sectionFormatID Each section format ID has its own name
+     * space of property ID strings and thus must be specified.
+     * @param  pid The property ID
+     * @return The well-known property ID string associated with the
+     * property ID <var>pid</var> in the name space spanned by <var>
+     * sectionFormatID</var> . If the <var>pid</var>
+     * /<var>sectionFormatID </var> combination is not well-known, the
+     * string "[undefined]" is returned.
     */
    public static String getPIDString(final byte[] sectionFormatID,
-            final int pid) {
+				      final int pid)
+    {
        final PropertyIDMap m =
-                (PropertyIDMap) getInstance().get(sectionFormatID);
-        if (m == null) {
+	    (PropertyIDMap) getInstance().get(sectionFormatID);
+        if (m == null)
            return UNDEFINED;
-        } else {
+        else
+	{
            final String s = (String) m.get(pid);
-            if (s == null) {
+            if (s == null)
                return UNDEFINED;
-            }
            return s;
        }
    }
@ -165,57 +162,47 @@ public class SectionIDMap extends HashMap {


    /**
-     *  <p>
-     *
-     *  Returns the {@link PropertyIDMap} for a given section format ID.</p>
-     *
-     *@param  sectionFormatID  Description of the Parameter
-     *@return                  Description of the Return Value
+     * <p>Returns the {@link PropertyIDMap} for a given section format
+     * ID.</p>
     */
-    public PropertyIDMap get(final byte[] sectionFormatID) {
+    public PropertyIDMap get(final byte[] sectionFormatID)
+    {
        return (PropertyIDMap) super.get(new String(sectionFormatID));
    }



    /**
-     *  <p>
+     * <p>Returns the {@link PropertyIDMap} for a given section format
+     * ID.</p>
     *
-     *  Returns the {@link PropertyIDMap} for a given section format ID.</p>
-     *
-     *@param  sectionFormatID  A section format ID as a <tt>byte[]</tt> .
-     *@return                  Description of the Return Value
-     *@deprecated              Use {@link #get(byte[])} instead!
+     * @param sectionFormatID A section format ID as a <tt>byte[]</tt> .
+     * @deprecated Use {@link #get(byte[])} instead!
     */
-    public Object get(final Object sectionFormatID) {
+    public Object get(final Object sectionFormatID)
+    {
        return get((byte[]) sectionFormatID);
    }



    /**
-     *  <p>
-     *
-     *  Associates a section format ID with a {@link PropertyIDMap}.</p>
-     *
-     *@param  sectionFormatID  Description of the Parameter
-     *@param  propertyIDMap    Description of the Parameter
-     *@return                  Description of the Return Value
+     * <p>Associates a section format ID with a {@link
+     * PropertyIDMap}.</p>
     */
    public Object put(final byte[] sectionFormatID,
-            final PropertyIDMap propertyIDMap) {
+		      final PropertyIDMap propertyIDMap)
+    {
        return super.put(new String(sectionFormatID), propertyIDMap);
    }



    /**
-     *@param  key    Description of the Parameter
-     *@param  value  Description of the Parameter
-     *@return        Description of the Return Value
-     *@deprecated    Use {@link #put(byte[], PropertyIDMap)} instead!
+     * @deprecated Use {@link #put(byte[], PropertyIDMap)} instead!
     */
-    public Object put(final Object key, final Object value) {
+    public Object put(final Object key, final Object value)
+    {
        return put((byte[]) key, (PropertyIDMap) value);
    }

--- a/src/testcases/org/apache/poi/hpsf/basic/TestBasic.java
+++ b/src/testcases/org/apache/poi/hpsf/basic/TestBasic.java
@ -183,7 +183,7 @@ public class TestBasic extends TestCase

    /**
     * <p>Tests the {@link PropertySet} methods. The test file has two
-     * property set: the first one is a {@link SummaryInformation},
+     * property sets: the first one is a {@link SummaryInformation},
     * the second one is a {@link DocumentSummaryInformation}.</p>
     */
    public void testPropertySetMethods() throws IOException, HPSFException
@ -214,11 +214,11 @@ public class TestBasic extends TestCase
    /**
     * <p>Runs the test cases stand-alone.</p>
     */
-    public static void main(String[] args)
+    public static void main(String[] args) throws Throwable
    {
 	System.setProperty("HPSF.testdata.path",
 			   "./src/testcases/org/apache/poi/hpsf/data");
-        junit.textui.TestRunner.run(TestBasic.class);
+	junit.textui.TestRunner.run(TestBasic.class);
    }

 }
--- a/src/testcases/org/apache/poi/hpsf/basic/TestUnicode.java
+++ b/src/testcases/org/apache/poi/hpsf/basic/TestUnicode.java
@ -0,0 +1,142 @@
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2002 The Apache Software Foundation.  All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ *    if any, must include the following acknowledgment:
+ *       "This product includes software developed by the
+ *        Apache Software Foundation (http://www.apache.org/)."
+ *    Alternately, this acknowledgment may appear in the software itself,
+ *    if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ *    "Apache POI" must not be used to endorse or promote products
+ *    derived from this software without prior written permission. For
+ *    written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ *    "Apache POI", nor may "Apache" appear in their name, without
+ *    prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation.  For more
+ * information on the Apache Software Foundation, please see
+ * <http://www.apache.org/>.
+ */
+
+package org.apache.poi.hpsf.basic;
+
+import java.io.*;
+import java.util.*;
+import junit.framework.*;
+import org.apache.poi.hpsf.*;
+
+
+
+/**
+ * <p>Tests whether Unicode string can be read from a
+ * DocumentSummaryInformation.</p>
+ *
+ * @author Rainer Klute (klute@rainer-klute.de)
+ * @since 2002-12-09
+ * @version $Id$
+ */
+public class TestUnicode extends TestCase
+{
+
+    final static String POI_FS = "TestUnicode.xls";
+    final static String[] POI_FILES = new String[]
+	{
+	    "\005DocumentSummaryInformation",
+	};
+    File data;
+    POIFile[] poiFiles;
+
+
+
+    public TestUnicode(String name)
+    {
+        super(name);
+    }
+
+
+
+    /**
+     * <p>Read a the test file from the "data" directory.</p>
+     */
+    public void setUp() throws FileNotFoundException, IOException
+    {
+	final File dataDir =
+	    new File(System.getProperty("HPSF.testdata.path"));
+	data = new File(dataDir, POI_FS);
+    }
+
+
+
+    /**
+     * <p>Tests the {@link PropertySet} methods. The test file has two
+     * property set: the first one is a {@link SummaryInformation},
+     * the second one is a {@link DocumentSummaryInformation}.</p>
+     */
+    public void testPropertySetMethods() throws IOException, HPSFException
+    {
+	POIFile poiFile = Util.readPOIFiles(data, POI_FILES)[0];
+	byte[] b = poiFile.getBytes();
+	PropertySet ps =
+	    PropertySetFactory.create(new ByteArrayInputStream(b));
+	Assert.assertTrue(ps.isDocumentSummaryInformation());
+	Assert.assertEquals(ps.getSectionCount(), 2);
+	Section s = (Section) ps.getSections().get(1);
+	Assert.assertEquals(s.getProperty(1),
+			    new Integer(1200));
+	Assert.assertEquals(s.getProperty(2),
+			    new Long(4198897018l));
+	Assert.assertEquals(s.getProperty(3),
+			    "MCon_Info zu Office bei Schreiner");
+	Assert.assertEquals(s.getProperty(4),
+			    "petrovitsch@schreiner-online.de");
+	Assert.assertEquals(s.getProperty(5),
+			    "Petrovitsch, Wilhelm");
+    }
+
+
+
+    /**
+     * <p>Runs the test cases stand-alone.</p>
+     */
+    public static void main(String[] args)
+    {
+	System.setProperty("HPSF.testdata.path",
+			   "./src/testcases/org/apache/poi/hpsf/data");
+        junit.textui.TestRunner.run(TestUnicode.class);
+    }
+
+}
--- a/src/testcases/org/apache/poi/hpsf/basic/Util.java
+++ b/src/testcases/org/apache/poi/hpsf/basic/Util.java
@ -114,10 +114,33 @@ public class Util
     */
    public static POIFile[] readPOIFiles(final File poiFs)
 	throws FileNotFoundException, IOException
+    {
+	return readPOIFiles(poiFs, null);
+    }
+
+
+
+    /**
+     * <p>Reads a set of files from a POI filesystem and returns them
+     * as an array of {@link POIFile} instances. This method loads all
+     * files into memory and thus does not cope well with large POI
+     * filessystems.</p>
+     * 
+     * @param file The name of the POI filesystem as seen by the
+     * operating system. (This is the "filename".)
+     *
+     * @param poiFiles The names of the POI files to be read.
+     *
+     * @return The POI files. The elements are ordered in the same way
+     * as the files in the POI filesystem.
+     */
+    public static POIFile[] readPOIFiles(final File poiFs,
+					 final String[] poiFiles)
+	throws FileNotFoundException, IOException
    {
 	final List files = new ArrayList();
 	POIFSReader r = new POIFSReader();
-	r.registerListener(new POIFSReaderListener()
+	POIFSReaderListener pfl = new POIFSReaderListener()
 	    {
 		public void processPOIFSReaderEvent(POIFSReaderEvent event)
 		{
@ -140,7 +163,17 @@ public class Util
 			throw new RuntimeException(ex.getMessage());
 		    }
 		}
-	    });
+	    };
+	if (poiFiles == null)
+	    /* Register the listener for all POI files. */
+	    r.registerListener(pfl);
+	else
+	    /* Register the listener for the specified POI files
+	     * only. */
+	    for (int i = 0; i < poiFiles.length; i++)
+		r.registerListener(pfl, poiFiles[i]);
+
+	/* Read the POI filesystem. */
 	r.read(new FileInputStream(poiFs));
 	POIFile[] result = new POIFile[files.size()];
 	for (int i = 0; i < result.length; i++)
--- a/src/testcases/org/apache/poi/hpsf/data/TestUnicode.xls
+++ b/src/testcases/org/apache/poi/hpsf/data/TestUnicode.xls