Work inspired by bug #48018 - get HWPF lists more consistent in read vs write, and preserve order as apparently that matters. Includes a fair number of list related unit tests, but not for everything

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@998943 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Nick Burch 2010-09-20 14:26:49 +00:00
parent d71e92ace2
commit 8b2f2bcff6
6 changed files with 322 additions and 33 deletions

View File

@ -34,6 +34,7 @@
<changes>
<release version="3.7-beta3" date="2010-??-??">
<action dev="poi-developers" type="fix">48018 - Improve HWPF handling of lists in documents read and then saved, by preserving order better</action>
<action dev="poi-developers" type="fix">49820 - Fix HWPF paragraph levels, so that outline levels can be properly fetched</action>
<action dev="poi-developers" type="fix">47271 - Avoid infinite loops on broken HWPF documents with a corrupt CHP style with a parent of itself</action>
<action dev="poi-developers" type="fix">49936 - Handle HWPF documents with problematic HeaderStories better</action>

View File

@ -91,7 +91,7 @@ public final class ListLevel
System.arraycopy(buf, offset, _rgbxchNums, 0, RGBXCH_NUMS_SIZE);
offset += RGBXCH_NUMS_SIZE;
_ixchFollow = buf[offset++];
_ixchFollow = buf[offset++];
_dxaSpace = LittleEndian.getInt(buf, offset);
offset += LittleEndian.INT_SIZE;
_dxaIndent = LittleEndian.getInt(buf, offset);
@ -216,10 +216,10 @@ public final class ListLevel
LittleEndian.putShort(buf, offset, _reserved);
offset += LittleEndian.SHORT_SIZE;
System.arraycopy(_grpprlChpx, 0, buf, offset, _cbGrpprlChpx);
offset += _cbGrpprlChpx;
System.arraycopy(_grpprlPapx, 0, buf, offset, _cbGrpprlPapx);
offset += _cbGrpprlPapx;
System.arraycopy(_grpprlChpx, 0, buf, offset, _cbGrpprlChpx);
offset += _cbGrpprlChpx;
if (_numberText == null) {
// TODO - write junit to test this flow

View File

@ -17,20 +17,23 @@
package org.apache.poi.hwpf.model;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.NoSuchElementException;
import java.util.Set;
import org.apache.poi.hwpf.model.io.HWPFOutputStream;
import org.apache.poi.util.LittleEndian;
import org.apache.poi.util.POILogFactory;
import org.apache.poi.util.POILogger;
import org.apache.poi.hwpf.model.io.*;
import java.util.HashMap;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.NoSuchElementException;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
/**
* @author Ryan Ackley
*/
@ -40,8 +43,8 @@ public final class ListTables
private static final int LIST_FORMAT_OVERRIDE_SIZE = 16;
private static POILogger log = POILogFactory.getLogger(ListTables.class);
HashMap _listMap = new HashMap();
ArrayList _overrideList = new ArrayList();
ListMap _listMap = new ListMap();
ArrayList<ListFormatOverride> _overrideList = new ArrayList<ListFormatOverride>();
public ListTables()
{
@ -109,22 +112,17 @@ public final class ListTables
public void writeListDataTo(HWPFOutputStream tableStream)
throws IOException
{
Integer[] intList = (Integer[])_listMap.keySet().toArray(new Integer[0]);
int listSize = _listMap.size();
// use this stream as a buffer for the levels since their size varies.
ByteArrayOutputStream levelBuf = new ByteArrayOutputStream();
// use a byte array for the lists because we know their size.
byte[] listBuf = new byte[intList.length * LIST_DATA_SIZE];
byte[] shortHolder = new byte[2];
LittleEndian.putShort(shortHolder, (short)intList.length);
LittleEndian.putShort(shortHolder, (short)listSize);
tableStream.write(shortHolder);
for (int x = 0; x < intList.length; x++)
{
ListData lst = (ListData)_listMap.get(intList[x]);
for(Integer x : _listMap.sortedKeys()) {
ListData lst = _listMap.get(x);
tableStream.write(lst.toByteArray());
ListLevel[] lvls = lst.getLevels();
for (int y = 0; y < lvls.length; y++)
@ -150,7 +148,7 @@ public final class ListTables
for (int x = 0; x < size; x++)
{
ListFormatOverride lfo = (ListFormatOverride)_overrideList.get(x);
ListFormatOverride lfo = _overrideList.get(x);
tableStream.write(lfo.toByteArray());
ListFormatOverrideLevel[] lfolvls = lfo.getLevelOverrides();
for (int y = 0; y < lfolvls.length; y++)
@ -164,7 +162,7 @@ public final class ListTables
public ListFormatOverride getOverride(int lfoIndex)
{
return (ListFormatOverride)_overrideList.get(lfoIndex - 1);
return _overrideList.get(lfoIndex - 1);
}
public int getOverrideIndexFromListID(int lstid)
@ -173,7 +171,7 @@ public final class ListTables
int size = _overrideList.size();
for (int x = 0; x < size; x++)
{
ListFormatOverride next = (ListFormatOverride)_overrideList.get(x);
ListFormatOverride next = _overrideList.get(x);
if (next.getLsid() == lstid)
{
// 1-based index I think
@ -190,7 +188,7 @@ public final class ListTables
public ListLevel getLevel(int listID, int level)
{
ListData lst = (ListData)_listMap.get(Integer.valueOf(listID));
ListData lst = _listMap.get(Integer.valueOf(listID));
if(level < lst.numLevels()) {
ListLevel lvl = lst.getLevels()[level];
return lvl;
@ -201,7 +199,7 @@ public final class ListTables
public ListData getListData(int listID)
{
return (ListData) _listMap.get(Integer.valueOf(listID));
return _listMap.get(Integer.valueOf(listID));
}
public boolean equals(Object obj)
@ -215,12 +213,12 @@ public final class ListTables
if (_listMap.size() == tables._listMap.size())
{
Iterator it = _listMap.keySet().iterator();
Iterator<Integer> it = _listMap.keySet().iterator();
while (it.hasNext())
{
Object key = it.next();
ListData lst1 = (ListData)_listMap.get(key);
ListData lst2 = (ListData)tables._listMap.get(key);
Integer key = it.next();
ListData lst1 = _listMap.get(key);
ListData lst2 = tables._listMap.get(key);
if (!lst1.equals(lst2))
{
return false;
@ -241,4 +239,70 @@ public final class ListTables
}
return false;
}
private static class ListMap implements Map<Integer, ListData> {
private ArrayList<Integer> keyList = new ArrayList<Integer>();
private HashMap<Integer,ListData> parent = new HashMap<Integer,ListData>();
private ListMap() {}
public void clear() {
keyList.clear();
parent.clear();
}
public boolean containsKey(Object key) {
return parent.containsKey(key);
}
public boolean containsValue(Object value) {
return parent.containsValue(value);
}
public ListData get(Object key) {
return parent.get(key);
}
public boolean isEmpty() {
return parent.isEmpty();
}
public ListData put(Integer key, ListData value) {
keyList.add(key);
return parent.put(key, value);
}
public void putAll(Map<? extends Integer, ? extends ListData> map) {
for(Entry<? extends Integer, ? extends ListData> entry : map.entrySet()) {
put(entry.getKey(), entry.getValue());
}
}
public ListData remove(Object key) {
keyList.remove(key);
return parent.remove(key);
}
public int size() {
return parent.size();
}
public Set<Entry<Integer, ListData>> entrySet() {
throw new IllegalStateException("Use sortedKeys() + get() instead");
}
public List<Integer> sortedKeys() {
return Collections.unmodifiableList(keyList);
}
public Set<Integer> keySet() {
throw new IllegalStateException("Use sortedKeys() instead");
}
public Collection<ListData> values() {
ArrayList<ListData> values = new ArrayList<ListData>();
for(Integer key : keyList) {
values.add(parent.get(key));
}
return values;
}
}
}

View File

@ -434,16 +434,30 @@ public class Paragraph extends Range implements Cloneable {
_papx.updateSprm(SPRM_DCS, dcs.toShort());
}
/**
* Returns the ilfo, an index to the document's hpllfo, which
* describes the automatic number formatting of the paragraph.
* A value of zero means it isn't numbered.
*/
public int getIlfo()
{
return _props.getIlfo();
}
/**
* Returns the multi-level indent for the paragraph. Will be
* zero for non-list paragraphs, and the first level of any
* list. Subsequent levels in hold values 1-8.
*/
public int getIlvl()
{
return _props.getIlvl();
}
/**
* Returns the heading level (1-8), or 9 if the paragraph
* isn't in a heading style.
*/
public int getLvl()
{
return _props.getLvl();

View File

@ -0,0 +1,210 @@
/* ====================================================================
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==================================================================== */
package org.apache.poi.hwpf.usermodel;
import java.io.IOException;
import junit.framework.TestCase;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.HWPFTestDataSamples;
/**
* Tests for our handling of lists
*/
public final class TestLists extends TestCase {
public void testBasics() {
HWPFDocument doc = HWPFTestDataSamples.openSampleFile("Lists.doc");
Range r = doc.getRange();
assertEquals(40, r.numParagraphs());
assertEquals("Heading Level 1\r", r.getParagraph(0).text());
assertEquals("This document has different lists in it for testing\r", r.getParagraph(1).text());
assertEquals("The end!\r", r.getParagraph(38).text());
assertEquals("\r", r.getParagraph(39).text());
assertEquals(0, r.getParagraph(0).getLvl());
assertEquals(9, r.getParagraph(1).getLvl());
assertEquals(9, r.getParagraph(38).getLvl());
assertEquals(9, r.getParagraph(39).getLvl());
}
public void testUnorderedLists() {
HWPFDocument doc = HWPFTestDataSamples.openSampleFile("Lists.doc");
Range r = doc.getRange();
assertEquals(40, r.numParagraphs());
// Normal bullet points
assertEquals("This document has different lists in it for testing\r", r.getParagraph(1).text());
assertEquals("Unordered list 1\r", r.getParagraph(2).text());
assertEquals("UL 2\r", r.getParagraph(3).text());
assertEquals("UL 3\r", r.getParagraph(4).text());
assertEquals("Next up is an ordered list:\r", r.getParagraph(5).text());
assertEquals(9, r.getParagraph(1).getLvl());
assertEquals(9, r.getParagraph(2).getLvl());
assertEquals(9, r.getParagraph(3).getLvl());
assertEquals(9, r.getParagraph(4).getLvl());
assertEquals(9, r.getParagraph(5).getLvl());
assertEquals(0, r.getParagraph(1).getIlvl());
assertEquals(0, r.getParagraph(2).getIlvl());
assertEquals(0, r.getParagraph(3).getIlvl());
assertEquals(0, r.getParagraph(4).getIlvl());
assertEquals(0, r.getParagraph(5).getIlvl());
// Tick bullets
assertEquals("Now for an un-ordered list with a different bullet style:\r", r.getParagraph(9).text());
assertEquals("Tick 1\r", r.getParagraph(10).text());
assertEquals("Tick 2\r", r.getParagraph(11).text());
assertEquals("Multi-level un-ordered list:\r", r.getParagraph(12).text());
assertEquals(9, r.getParagraph(9).getLvl());
assertEquals(9, r.getParagraph(10).getLvl());
assertEquals(9, r.getParagraph(11).getLvl());
assertEquals(9, r.getParagraph(12).getLvl());
assertEquals(0, r.getParagraph(9).getIlvl());
assertEquals(0, r.getParagraph(10).getIlvl());
assertEquals(0, r.getParagraph(11).getIlvl());
assertEquals(0, r.getParagraph(12).getIlvl());
// TODO Test for tick not bullet
}
public void testOrderedLists() {
HWPFDocument doc = HWPFTestDataSamples.openSampleFile("Lists.doc");
Range r = doc.getRange();
assertEquals(40, r.numParagraphs());
assertEquals("Next up is an ordered list:\r", r.getParagraph(5).text());
assertEquals("Ordered list 1\r", r.getParagraph(6).text());
assertEquals("OL 2\r", r.getParagraph(7).text());
assertEquals("OL 3\r", r.getParagraph(8).text());
assertEquals("Now for an un-ordered list with a different bullet style:\r", r.getParagraph(9).text());
assertEquals(9, r.getParagraph(5).getLvl());
assertEquals(9, r.getParagraph(6).getLvl());
assertEquals(9, r.getParagraph(7).getLvl());
assertEquals(9, r.getParagraph(8).getLvl());
assertEquals(9, r.getParagraph(9).getLvl());
assertEquals(0, r.getParagraph(5).getIlvl());
assertEquals(0, r.getParagraph(6).getIlvl());
assertEquals(0, r.getParagraph(7).getIlvl());
assertEquals(0, r.getParagraph(8).getIlvl());
assertEquals(0, r.getParagraph(9).getIlvl());
}
public void testMultiLevelLists() {
HWPFDocument doc = HWPFTestDataSamples.openSampleFile("Lists.doc");
Range r = doc.getRange();
assertEquals(40, r.numParagraphs());
assertEquals("Multi-level un-ordered list:\r", r.getParagraph(12).text());
assertEquals("ML 1:1\r", r.getParagraph(13).text());
assertEquals("ML 1:2\r", r.getParagraph(14).text());
assertEquals("ML 2:1\r", r.getParagraph(15).text());
assertEquals("ML 2:2\r", r.getParagraph(16).text());
assertEquals("ML 2:3\r", r.getParagraph(17).text());
assertEquals("ML 3:1\r", r.getParagraph(18).text());
assertEquals("ML 4:1\r", r.getParagraph(19).text());
assertEquals("ML 5:1\r", r.getParagraph(20).text());
assertEquals("ML 5:2\r", r.getParagraph(21).text());
assertEquals("ML 2:4\r", r.getParagraph(22).text());
assertEquals("ML 1:3\r", r.getParagraph(23).text());
assertEquals("Multi-level ordered list:\r", r.getParagraph(24).text());
assertEquals("OL 1\r", r.getParagraph(25).text());
assertEquals("OL 2\r", r.getParagraph(26).text());
assertEquals("OL 2.1\r", r.getParagraph(27).text());
assertEquals("OL 2.2\r", r.getParagraph(28).text());
assertEquals("OL 2.2.1\r", r.getParagraph(29).text());
assertEquals("OL 2.2.2\r", r.getParagraph(30).text());
assertEquals("OL 2.2.2.1\r", r.getParagraph(31).text());
assertEquals("OL 2.2.3\r", r.getParagraph(32).text());
assertEquals("OL 3\r", r.getParagraph(33).text());
assertEquals("Finally we want some indents, to tell the difference\r", r.getParagraph(34).text());
for(int i=12; i<=34; i++) {
assertEquals(9, r.getParagraph(12).getLvl());
}
assertEquals(0, r.getParagraph(12).getIlvl());
assertEquals(0, r.getParagraph(13).getIlvl());
assertEquals(0, r.getParagraph(14).getIlvl());
assertEquals(1, r.getParagraph(15).getIlvl());
assertEquals(1, r.getParagraph(16).getIlvl());
assertEquals(1, r.getParagraph(17).getIlvl());
assertEquals(2, r.getParagraph(18).getIlvl());
assertEquals(3, r.getParagraph(19).getIlvl());
assertEquals(4, r.getParagraph(20).getIlvl());
assertEquals(4, r.getParagraph(21).getIlvl());
assertEquals(1, r.getParagraph(22).getIlvl());
assertEquals(0, r.getParagraph(23).getIlvl());
assertEquals(0, r.getParagraph(24).getIlvl());
assertEquals(0, r.getParagraph(25).getIlvl());
assertEquals(0, r.getParagraph(26).getIlvl());
assertEquals(1, r.getParagraph(27).getIlvl());
assertEquals(1, r.getParagraph(28).getIlvl());
assertEquals(2, r.getParagraph(29).getIlvl());
assertEquals(2, r.getParagraph(30).getIlvl());
assertEquals(3, r.getParagraph(31).getIlvl());
assertEquals(2, r.getParagraph(32).getIlvl());
assertEquals(0, r.getParagraph(33).getIlvl());
assertEquals(0, r.getParagraph(34).getIlvl());
}
public void testIndentedText() {
HWPFDocument doc = HWPFTestDataSamples.openSampleFile("Lists.doc");
Range r = doc.getRange();
assertEquals(40, r.numParagraphs());
assertEquals("Finally we want some indents, to tell the difference\r", r.getParagraph(34).text());
assertEquals("Indented once\r", r.getParagraph(35).text());
assertEquals("Indented twice\r", r.getParagraph(36).text());
assertEquals("Indented three times\r", r.getParagraph(37).text());
assertEquals("The end!\r", r.getParagraph(38).text());
assertEquals(9, r.getParagraph(34).getLvl());
assertEquals(9, r.getParagraph(35).getLvl());
assertEquals(9, r.getParagraph(36).getLvl());
assertEquals(9, r.getParagraph(37).getLvl());
assertEquals(9, r.getParagraph(38).getLvl());
assertEquals(9, r.getParagraph(39).getLvl());
assertEquals(0, r.getParagraph(34).getIlvl());
assertEquals(0, r.getParagraph(35).getIlvl());
assertEquals(0, r.getParagraph(36).getIlvl());
assertEquals(0, r.getParagraph(37).getIlvl());
assertEquals(0, r.getParagraph(38).getIlvl());
assertEquals(0, r.getParagraph(39).getIlvl());
// TODO Test the indent
}
public void testWriteRead() throws IOException {
HWPFDocument doc = HWPFTestDataSamples.openSampleFile("Lists.doc");
doc = HWPFTestDataSamples.writeOutAndReadBack(doc);
Range r = doc.getRange();
// Check a couple at random
assertEquals(4, r.getParagraph(21).getIlvl());
assertEquals(1, r.getParagraph(22).getIlvl());
assertEquals(0, r.getParagraph(23).getIlvl());
}
}

Binary file not shown.