From 87f42602fe6860209901ae0e13d1663656b7e4a0 Mon Sep 17 00:00:00 2001 From: Sergey Vladimirov Date: Thu, 21 Jul 2011 04:25:39 +0000 Subject: [PATCH] more compact HTML in Word-to-HTML converter git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1149035 13f79535-47bb-0310-9956-ffa450edef68 --- .../hwpf/converter/WordToHtmlConverter.java | 28 +++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/src/scratchpad/src/org/apache/poi/hwpf/converter/WordToHtmlConverter.java b/src/scratchpad/src/org/apache/poi/hwpf/converter/WordToHtmlConverter.java index 032d732c11..3fff0aa4bc 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/converter/WordToHtmlConverter.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/converter/WordToHtmlConverter.java @@ -45,6 +45,8 @@ import org.apache.poi.util.POILogFactory; import org.apache.poi.util.POILogger; import org.w3c.dom.Document; import org.w3c.dom.Element; +import org.w3c.dom.Node; +import org.w3c.dom.NodeList; import org.w3c.dom.Text; import static org.apache.poi.hwpf.converter.AbstractWordUtils.TWIPS_PER_INCH; @@ -523,6 +525,32 @@ public class WordToHtmlConverter extends AbstractWordConverter htmlDocumentFacade.getOrCreateCssClass( pElement.getTagName(), "p", style.toString() ) ); + { + // compact spans + NodeList childNodes = pElement.getChildNodes(); + for ( int i = 0; i < childNodes.getLength() - 1; i++ ) + { + Node child1 = childNodes.item( i ); + Node child2 = childNodes.item( i + 1 ); + if ( child1.getNodeType() != Node.ELEMENT_NODE + || child2.getNodeType() != Node.ELEMENT_NODE + || !WordToHtmlUtils.equals( "span", + ( (Element) child1 ).getTagName() ) + || !WordToHtmlUtils.equals( "span", + ( (Element) child2 ).getTagName() ) + || !WordToHtmlUtils.equals( + ( (Element) child1 ).getAttribute( "class" ), + ( (Element) child2 ).getAttribute( "class" ) ) ) + continue; + + // merge + while ( child2.getChildNodes().getLength() > 0 ) + child1.appendChild( child2.getFirstChild() ); + child2.getParentNode().removeChild( child2 ); + i--; + } + } + return; }