[github-389] Insert paragraphs and tables into XWPFDocuments recursively. Thanks to Anton Oellerer. This closes #389

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1904680 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
PJ Fanning 2022-10-18 11:49:34 +00:00
parent aa63b125d3
commit 4c3a0b4e93
2 changed files with 197 additions and 63 deletions

View File

@ -27,15 +27,16 @@ import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.Deque;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.NoSuchElementException;
import java.util.Optional;
import java.util.Spliterator;
import javax.xml.namespace.QName;
import org.apache.commons.io.output.UnsynchronizedByteArrayOutputStream;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
@ -682,6 +683,7 @@ public class XWPFDocument extends POIXMLDocument implements Document, IBody {
*/
@Override
public XWPFParagraph insertNewParagraph(XmlCursor cursor) {
Deque<XmlObject> path = getPathToObject(cursor);
String uri = CTP.type.getName().getNamespaceURI();
/*
* TODO DO not use a coded constant, find the constant in the OOXML
@ -696,6 +698,63 @@ public class XWPFDocument extends POIXMLDocument implements Document, IBody {
cursor.toParent();
CTP p = (CTP) cursor.getObject();
XWPFParagraph newP = new XWPFParagraph(p, this);
insertIntoParentElement(newP, path);
cursor.toCursor(newP.getCTP().newCursor());
cursor.toEndToken();
return newP;
}
@Override
public XWPFTable insertNewTbl(XmlCursor cursor) {
Deque<XmlObject> path = getPathToObject(cursor);
String uri = CTTbl.type.getName().getNamespaceURI();
String localPart = "tbl";
cursor.beginElement(localPart, uri);
cursor.toParent();
CTTbl t = (CTTbl) cursor.getObject();
XWPFTable newT = new XWPFTable(t, this);
insertIntoParentElement(newT, path);
cursor.toCursor(newT.getCTTbl().newCursor());
cursor.toEndToken();
return newT;
}
private Deque<XmlObject> getPathToObject(XmlCursor cursor) {
Deque<XmlObject> searchPath = new LinkedList<>();
try (XmlCursor verify = cursor.newCursor()) {
while (verify.toParent() && searchPath.peekFirst() != this.ctDocument.getBody()) {
searchPath.addFirst(verify.getObject());
}
}
return searchPath;
}
private void insertIntoParentElement(IBodyElement iBodyElement, Deque<XmlObject> path) {
XmlObject firstObject = path.pop();
if (path.isEmpty()) {
if (iBodyElement instanceof XWPFParagraph) {
insertIntoParagraphsAndElements((XWPFParagraph) iBodyElement, paragraphs, bodyElements);
} else if (iBodyElement instanceof XWPFTable) {
insertIntoTablesAndElements((XWPFTable) iBodyElement, tables, bodyElements);
}
} else {
CTTbl ctTbl = (CTTbl) path.pop(); //first object is always the body, we want the second one
for (XWPFTable xwpfTable : tables) {
if (ctTbl == xwpfTable.getCTTbl()) {
insertElementIntoTable(xwpfTable, iBodyElement, path);
}
}
}
}
private void insertIntoParagraphsAndElements(XWPFParagraph newP, List<XWPFParagraph> paragraphs, List<IBodyElement> bodyElements) {
insertIntoParagraphs(newP, paragraphs);
insertIntoBodyElements(newP, bodyElements);
}
private void insertIntoParagraphs(XWPFParagraph newP, List<XWPFParagraph> paragraphs) {
try (XmlCursor cursor = newP.getCTP().newCursor()) {
XmlObject p = cursor.getObject();
XmlObject o = null;
/*
* move the cursor to the previous element until a) the next
@ -718,12 +777,50 @@ public class XWPFDocument extends POIXMLDocument implements Document, IBody {
int pos = paragraphs.indexOf(getParagraph((CTP) o)) + 1;
paragraphs.add(pos, newP);
}
}
}
private void insertIntoTablesAndElements(XWPFTable newT, List<XWPFTable> tables, List<IBodyElement> bodyElements) {
insertIntoTables(newT, tables);
insertIntoBodyElements(newT, bodyElements);
}
private void insertIntoTables(XWPFTable newT, List<XWPFTable> tables) {
try (XmlCursor cursor = newT.getCTTbl().newCursor()) {
XmlObject p = cursor.getObject();
XmlObject o = null;
/*
* move the cursor to the previous element until a) the next
* paragraph is found or b) all elements have been passed
*/
while (!(o instanceof CTTbl) && (cursor.toPrevSibling())) {
o = cursor.getObject();
}
/*
* if the object that has been found is a) not a paragraph or b) is
* the paragraph that has just been inserted, as the cursor in the
* while loop above was not moved as there were no other siblings,
* then the paragraph that was just inserted is the first paragraph
* in the body. Otherwise, take the previous paragraph and calculate
* the new index for the new paragraph.
*/
if (!(o instanceof CTTbl)) {
tables.add(0, newT);
} else {
int pos = tables.indexOf(getTable((CTTbl) o)) + 1;
tables.add(pos, newT);
}
}
}
private void insertIntoBodyElements(IBodyElement iBodyElement, List<IBodyElement> bodyElements) {
/*
* create a new cursor, that points to the START token of the just
* inserted paragraph
*/
try (XmlCursor newParaPos = p.newCursor()) {
try (XmlCursor cursor = getNewCursor(iBodyElement).orElseThrow(NoSuchElementException::new);
XmlCursor newParaPos = getNewCursor(iBodyElement).orElseThrow(NoSuchElementException::new)) {
XmlObject o;
/*
* Calculate the paragraphs index in the list of all body
* elements
@ -736,44 +833,52 @@ public class XWPFDocument extends POIXMLDocument implements Document, IBody {
i++;
}
}
bodyElements.add(i, newP);
bodyElements.add(i, iBodyElement);
cursor.toCursor(newParaPos);
cursor.toEndToken();
return newP;
} catch (NoSuchElementException ignored) {
//We could not open a cursor to the ibody element
}
}
@Override
public XWPFTable insertNewTbl(XmlCursor cursor) {
String uri = CTTbl.type.getName().getNamespaceURI();
String localPart = "tbl";
cursor.beginElement(localPart, uri);
cursor.toParent();
CTTbl t = (CTTbl) cursor.getObject();
XWPFTable newT = new XWPFTable(t, this);
XmlObject o = null;
while (!(o instanceof CTTbl) && (cursor.toPrevSibling())) {
o = cursor.getObject();
private Optional<XmlCursor> getNewCursor(IBodyElement iBodyElement) {
if (iBodyElement instanceof XWPFParagraph) {
return Optional.ofNullable(((XWPFParagraph) iBodyElement).getCTP().newCursor());
} else if (iBodyElement instanceof XWPFTable) {
return Optional.ofNullable(((XWPFTable) iBodyElement).getCTTbl().newCursor());
}
return Optional.empty();
}
private void insertElementIntoTable(XWPFTable xwpfTable, IBodyElement iBodyElement, Deque<XmlObject> path) {
CTRow row = (CTRow) path.pop();
for (XWPFTableRow tableRow : xwpfTable.getRows()) {
if (tableRow.getCtRow() == row) {
insertElementIntoRow(tableRow, iBodyElement, path);
}
}
}
private void insertElementIntoRow(XWPFTableRow tableRow, IBodyElement iBodyElement, Deque<XmlObject> path) {
CTTc cell = (CTTc) path.pop();
for (XWPFTableCell tableCell : tableRow.getTableCells()) {
if (tableCell.getCTTc() == cell) {
insertElementIntoCell(tableCell, iBodyElement, path);
}
}
}
private void insertElementIntoCell(XWPFTableCell tableCell, IBodyElement iBodyElement, Deque<XmlObject> path) {
if (path.isEmpty()) {
if (iBodyElement instanceof XWPFParagraph) {
insertIntoParagraphsAndElements((XWPFParagraph) iBodyElement, tableCell.paragraphs, tableCell.bodyElements);
} else if (iBodyElement instanceof XWPFTable) {
insertIntoTablesAndElements((XWPFTable) iBodyElement, tableCell.tables, tableCell.bodyElements);
}
if (!(o instanceof CTTbl)) {
tables.add(0, newT);
} else {
int pos = tables.indexOf(getTable((CTTbl) o)) + 1;
tables.add(pos, newT);
}
int i = 0;
try (XmlCursor tableCursor = t.newCursor()) {
cursor.toCursor(tableCursor);
while (cursor.toPrevSibling()) {
o = cursor.getObject();
if (o instanceof CTP || o instanceof CTTbl) {
i++;
}
}
bodyElements.add(i, newT);
cursor.toCursor(tableCursor);
cursor.toEndToken();
return newT;
// another table
insertElementIntoTable((XWPFTable) path.pop(), iBodyElement, path);
}
}

View File

@ -44,6 +44,7 @@ import org.apache.poi.poifs.filesystem.POIFSFileSystem;
import org.apache.poi.xwpf.extractor.XWPFWordExtractor;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import org.apache.poi.xwpf.usermodel.XWPFParagraph;
import org.apache.poi.xwpf.usermodel.XWPFTable;
import org.apache.poi.xwpf.usermodel.XWPFTableCell;
import org.apache.xmlbeans.XmlCursor;
import org.apache.xmlbeans.XmlException;
@ -196,20 +197,48 @@ class TestXWPFBugs {
}
}
@Test
void insertTableDirectlyIntoBody() throws IOException {
try (XWPFDocument document = new XWPFDocument(samples.openResourceAsStream("bug66312.docx"))) {
XWPFParagraph paragraph = document.getParagraphArray(0);
insertTable(paragraph, document);
assertEquals("Hello", document.getTableArray(0).getRow(0).getCell(0).getText());
assertEquals("World", document.getParagraphArray(0).getText());
}
}
@Test
void insertParagraphIntoTable() throws IOException {
try (XWPFDocument document = new XWPFDocument(samples.openResourceAsStream("bug66312.docx"))) {
XWPFTableCell cell = document.getTableArray(0).getRow(0).getCell(0);
XWPFParagraph paragraph = cell.getParagraphArray(0);
insertParagraph(paragraph, document);
//TODO the issue reporter thinks that there should be 2 paragraphs (with 'Hello' and 'World' repectively).
assertEquals("Hello", cell.getParagraphArray(0).getText());
assertEquals("World", cell.getParagraphArray(1).getText());
}
}
@Test
void insertTableIntoTable() throws IOException {
try (XWPFDocument document = new XWPFDocument(samples.openResourceAsStream("bug66312.docx"))) {
XWPFTableCell cell = document.getTableArray(0).getRow(0).getCell(0);
XWPFParagraph paragraph = cell.getParagraphArray(0);
insertTable(paragraph, document);
assertEquals("Hello", cell.getTableArray(0).getRow(0).getCell(0).getText());
assertEquals("World", cell.getParagraphArray(0).getText());
}
}
public static void insertParagraph(XWPFParagraph xwpfParagraph, XWPFDocument document) {
XmlCursor xmlCursor = xwpfParagraph.getCTP().newCursor();
XWPFParagraph xwpfParagraph2 = document.insertNewParagraph(xmlCursor);
xwpfParagraph2.createRun().setText("Hello");
}
public static void insertTable(XWPFParagraph xwpfParagraph, XWPFDocument document) {
XmlCursor xmlCursor = xwpfParagraph.getCTP().newCursor();
XWPFTable xwpfTable = document.insertNewTbl(xmlCursor);
xwpfTable.getRow(0).getCell(0).setText("Hello");
}
}