From 67710377a853a851b69ace7cc29a20c6d4246f11 Mon Sep 17 00:00:00 2001 From: Sergey Vladimirov Date: Tue, 19 Jul 2011 10:51:23 +0000 Subject: [PATCH] compact HTML output of WordToHtmlConverter git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1148269 13f79535-47bb-0310-9956-ffa450edef68 --- .../hwpf/converter/WordToHtmlConverter.java | 73 ++++++++++------ .../poi/hwpf/converter/WordToHtmlUtils.java | 86 +++++++++---------- .../converter/TestWordToHtmlConverter.java | 2 +- 3 files changed, 89 insertions(+), 72 deletions(-) diff --git a/src/scratchpad/src/org/apache/poi/hwpf/converter/WordToHtmlConverter.java b/src/scratchpad/src/org/apache/poi/hwpf/converter/WordToHtmlConverter.java index b1656aeb6c..f81be74c82 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/converter/WordToHtmlConverter.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/converter/WordToHtmlConverter.java @@ -70,20 +70,20 @@ public class WordToHtmlConverter extends AbstractWordConverter float bottomMargin = section.getMarginBottom() / TWIPS_PER_INCH; String style = "margin: " + topMargin + "in " + rightMargin + "in " - + bottomMargin + "in " + leftMargin + "in; "; + + bottomMargin + "in " + leftMargin + "in;"; if ( section.getNumColumns() > 1 ) { - style += "column-count: " + ( section.getNumColumns() ) + "; "; + style += "column-count: " + ( section.getNumColumns() ) + ";"; if ( section.isColumnsEvenlySpaced() ) { float distance = section.getDistanceBetweenColumns() / TWIPS_PER_INCH; - style += "column-gap: " + distance + "in; "; + style += "column-gap: " + distance + "in;"; } else { - style += "column-gap: 0.25in; "; + style += "column-gap: 0.25in;"; } } return style; @@ -160,6 +160,7 @@ public class WordToHtmlConverter extends AbstractWordConverter public Document getDocument() { + htmlDocumentFacade.updateStylesheet(); return htmlDocumentFacade.getDocument(); } @@ -178,24 +179,25 @@ public class WordToHtmlConverter extends AbstractWordConverter && !WordToHtmlUtils.equals( triplet.fontName, blockProperies.pFontName ) ) { - style.append( "font-family: " + triplet.fontName + "; " ); + style.append( "font-family:" + triplet.fontName + ";" ); } if ( characterRun.getFontSize() / 2 != blockProperies.pFontSize ) { - style.append( "font-size: " + characterRun.getFontSize() / 2 + "; " ); + style.append( "font-size:" + characterRun.getFontSize() / 2 + "pt;" ); } if ( triplet.bold ) { - style.append( "font-weight: bold; " ); + style.append( "font-weight:bold;" ); } if ( triplet.italic ) { - style.append( "font-style: italic; " ); + style.append( "font-style:italic;" ); } WordToHtmlUtils.addCharactersProperties( characterRun, style ); if ( style.length() != 0 ) - span.setAttribute( "style", style.toString() ); + span.setAttribute( "class", htmlDocumentFacade.getOrCreateCssClass( + span.getTagName(), "s", style.toString() ) ); Text textNode = htmlDocumentFacade.createText( text ); span.appendChild( textNode ); @@ -312,22 +314,28 @@ public class WordToHtmlConverter extends AbstractWordConverter float visibleHeight = Math.max( 0, imageHeight - cropTop - cropBottom ); - root = htmlDocumentFacade.document.createElement( "div" ); - root.setAttribute( "style", "vertical-align:text-bottom;width:" - + visibleWidth + "in;height:" + visibleHeight + "in;" ); + root = htmlDocumentFacade.createBlock(); + root.setAttribute( "class", htmlDocumentFacade.getOrCreateCssClass( + root.getTagName(), "d", "vertical-align:text-bottom;width:" + + visibleWidth + "in;height:" + visibleHeight + + "in;" ) ); // complex - Element inner = htmlDocumentFacade.document.createElement( "div" ); - inner.setAttribute( "style", "position:relative;width:" - + visibleWidth + "in;height:" + visibleHeight - + "in;overflow:hidden;" ); + Element inner = htmlDocumentFacade.createBlock(); + inner.setAttribute( "class", htmlDocumentFacade + .getOrCreateCssClass( inner.getTagName(), "d", + "position:relative;width:" + visibleWidth + + "in;height:" + visibleHeight + + "in;overflow:hidden;" ) ); root.appendChild( inner ); Element image = htmlDocumentFacade.document.createElement( "img" ); image.setAttribute( "src", imageSourcePath ); - image.setAttribute( "style", "position:absolute;left:-" + cropLeft - + ";top:-" + cropTop + ";width:" + imageWidth - + "in;height:" + imageHeight + "in;" ); + image.setAttribute( "class", htmlDocumentFacade + .getOrCreateCssClass( image.getTagName(), "i", + "position:absolute;left:-" + cropLeft + ";top:-" + + cropTop + ";width:" + imageWidth + + "in;height:" + imageHeight + "in;" ) ); inner.appendChild( image ); style.append( "overflow:hidden;" ); @@ -414,7 +422,10 @@ public class WordToHtmlConverter extends AbstractWordConverter } if ( style.length() > 0 ) - pElement.setAttribute( "style", style.toString() ); + pElement.setAttribute( + "class", + htmlDocumentFacade.getOrCreateCssClass( + pElement.getTagName(), "p", style.toString() ) ); return; } @@ -422,8 +433,9 @@ public class WordToHtmlConverter extends AbstractWordConverter protected void processSection( HWPFDocumentCore wordDocument, Section section, int sectionCounter ) { - Element div = htmlDocumentFacade.document.createElement( "div" ); - div.setAttribute( "style", getSectionStyle( section ) ); + Element div = htmlDocumentFacade.createBlock(); + div.setAttribute( "class", htmlDocumentFacade.getOrCreateCssClass( + div.getTagName(), "d", getSectionStyle( section ) ) ); htmlDocumentFacade.body.appendChild( div ); processSectionParagraphes( wordDocument, div, section, @@ -434,8 +446,9 @@ public class WordToHtmlConverter extends AbstractWordConverter protected void processSingleSection( HWPFDocumentCore wordDocument, Section section ) { - htmlDocumentFacade.body.setAttribute( "style", - getSectionStyle( section ) ); + htmlDocumentFacade.body + .setAttribute( "class", htmlDocumentFacade.getOrCreateCssClass( + "body", "b", getSectionStyle( section ) ) ); processSectionParagraphes( wordDocument, htmlDocumentFacade.body, section, Integer.MIN_VALUE ); @@ -538,15 +551,19 @@ public class WordToHtmlConverter extends AbstractWordConverter .createParagraph() ); } if ( tableCellStyle.length() > 0 ) - tableCellElement.setAttribute( "style", - tableCellStyle.toString() ); + tableCellElement.setAttribute( "class", htmlDocumentFacade + .getOrCreateCssClass( + tableCellElement.getTagName(), + tableCellElement.getTagName(), + tableCellStyle.toString() ) ); tableRowElement.appendChild( tableCellElement ); } if ( tableRowStyle.length() > 0 ) - tableRowElement - .setAttribute( "style", tableRowStyle.toString() ); + tableRowElement.setAttribute( "class", htmlDocumentFacade + .getOrCreateCssClass( "tr", "r", + tableRowStyle.toString() ) ); if ( tableRow.isTableHeader() ) { diff --git a/src/scratchpad/src/org/apache/poi/hwpf/converter/WordToHtmlUtils.java b/src/scratchpad/src/org/apache/poi/hwpf/converter/WordToHtmlUtils.java index 59a11753d0..b6a8c492cb 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/converter/WordToHtmlUtils.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/converter/WordToHtmlUtils.java @@ -26,7 +26,7 @@ public class WordToHtmlUtils extends AbstractWordUtils { public static void addBold( final boolean bold, StringBuilder style ) { - style.append( "font-weight: " + ( bold ? "bold" : "normal" ) + ";" ); + style.append( "font-weight:" + ( bold ? "bold" : "normal" ) + ";" ); } public static void addBorder( BorderCode borderCode, String where, @@ -37,21 +37,21 @@ public class WordToHtmlUtils extends AbstractWordUtils if ( isEmpty( where ) ) { - style.append( "border-style: " + getBorderType( borderCode ) + "; " ); - style.append( "border-color: " + getColor( borderCode.getColor() ) - + "; " ); - style.append( "border-width: " + getBorderWidth( borderCode ) - + "; " ); + style.append( "border:" ); } else { - style.append( "border-" + where + "-style: " - + getBorderType( borderCode ) + "; " ); - style.append( "border-" + where + "-color: " - + getColor( borderCode.getColor() ) + "; " ); - style.append( "border-" + where + "-width: " - + getBorderWidth( borderCode ) + "; " ); + style.append( "border-" ); + style.append( where ); } + + style.append( ":" ); + style.append( getBorderWidth( borderCode ) ); + style.append( ' ' ); + style.append( getBorderType( borderCode ) ); + style.append( ' ' ); + style.append( getColor( borderCode.getColor() ) ); + style.append( ';' ); } public static void addCharactersProperties( @@ -61,43 +61,43 @@ public class WordToHtmlUtils extends AbstractWordUtils if ( characterRun.isCapitalized() ) { - style.append( "text-transform: uppercase; " ); + style.append( "text-transform:uppercase;" ); } if ( characterRun.isHighlighted() ) { - style.append( "background-color: " - + getColor( characterRun.getHighlightedColor() ) + "; " ); + style.append( "background-color:" + + getColor( characterRun.getHighlightedColor() ) + ";" ); } if ( characterRun.isStrikeThrough() ) { - style.append( "text-decoration: line-through; " ); + style.append( "text-decoration:line-through;" ); } if ( characterRun.isShadowed() ) { - style.append( "text-shadow: " + characterRun.getFontSize() / 24 - + "pt; " ); + style.append( "text-shadow:" + characterRun.getFontSize() / 24 + + "pt;" ); } if ( characterRun.isSmallCaps() ) { - style.append( "font-variant: small-caps; " ); + style.append( "font-variant:small-caps;" ); } if ( characterRun.getSubSuperScriptIndex() == 1 ) { - style.append( "baseline-shift: super; " ); - style.append( "font-size: smaller; " ); + style.append( "baseline-shift:super;" ); + style.append( "font-size:smaller;" ); } if ( characterRun.getSubSuperScriptIndex() == 2 ) { - style.append( "baseline-shift: sub; " ); - style.append( "font-size: smaller; " ); + style.append( "baseline-shift:sub;" ); + style.append( "font-size:smaller;" ); } if ( characterRun.getUnderlineCode() > 0 ) { - style.append( "text-decoration: underline; " ); + style.append( "text-decoration:underline;" ); } if ( characterRun.isVanished() ) { - style.append( "visibility: hidden; " ); + style.append( "visibility:hidden;" ); } } @@ -107,12 +107,12 @@ public class WordToHtmlUtils extends AbstractWordUtils if ( isEmpty( fontFamily ) ) return; - style.append( "font-family: " + fontFamily + "; " ); + style.append( "font-family:" + fontFamily + ";" ); } public static void addFontSize( final int fontSize, StringBuilder style ) { - style.append( "font-size: " + fontSize + "pt; " ); + style.append( "font-size:" + fontSize + "pt;" ); } public static void addIndent( Paragraph paragraph, StringBuilder style ) @@ -130,7 +130,7 @@ public class WordToHtmlUtils extends AbstractWordUtils if ( twipsValue == 0 ) return; - style.append( cssName + ": " + ( twipsValue / TWIPS_PER_PT ) + "pt; " ); + style.append( cssName + ":" + ( twipsValue / TWIPS_PER_PT ) + "pt;" ); } public static void addJustification( Paragraph paragraph, @@ -138,7 +138,7 @@ public class WordToHtmlUtils extends AbstractWordUtils { String justification = getJustification( paragraph.getJustification() ); if ( isNotEmpty( justification ) ) - style.append( "text-align: " + justification + "; " ); + style.append( "text-align:" + justification + ";" ); } public static void addParagraphProperties( Paragraph paragraph, @@ -154,20 +154,20 @@ public class WordToHtmlUtils extends AbstractWordUtils if ( paragraph.pageBreakBefore() ) { - style.append( "break-before: page; " ); + style.append( "break-before:page;" ); } - style.append( "hyphenate: " - + ( paragraph.isAutoHyphenated() ? "auto" : "none" ) + "; " ); + style.append( "hyphenate:" + + ( paragraph.isAutoHyphenated() ? "auto" : "none" ) + ";" ); if ( paragraph.keepOnPage() ) { - style.append( "keep-together.within-page: always; " ); + style.append( "keep-together.within-page:always;" ); } if ( paragraph.keepWithNext() ) { - style.append( "keep-with-next.within-page: always; " ); + style.append( "keep-with-next.within-page:always;" ); } } @@ -175,12 +175,12 @@ public class WordToHtmlUtils extends AbstractWordUtils TableCell tableCell, boolean toppest, boolean bottomest, boolean leftest, boolean rightest, StringBuilder style ) { - style.append( "width: " + ( tableCell.getWidth() / TWIPS_PER_INCH ) - + "in; " ); - style.append( "padding-start: " - + ( tableRow.getGapHalf() / TWIPS_PER_INCH ) + "in; " ); - style.append( "padding-end: " - + ( tableRow.getGapHalf() / TWIPS_PER_INCH ) + "in; " ); + style.append( "width:" + ( tableCell.getWidth() / TWIPS_PER_INCH ) + + "in;" ); + style.append( "padding-start:" + + ( tableRow.getGapHalf() / TWIPS_PER_INCH ) + "in;" ); + style.append( "padding-end:" + + ( tableRow.getGapHalf() / TWIPS_PER_INCH ) + "in;" ); BorderCode top = tableCell.getBrcTop() != null && tableCell.getBrcTop().getBorderType() != 0 ? tableCell @@ -211,12 +211,12 @@ public class WordToHtmlUtils extends AbstractWordUtils { if ( tableRow.getRowHeight() > 0 ) { - style.append( "height: " - + ( tableRow.getRowHeight() / TWIPS_PER_INCH ) + "in; " ); + style.append( "height:" + + ( tableRow.getRowHeight() / TWIPS_PER_INCH ) + "in;" ); } if ( !tableRow.cantSplit() ) { - style.append( "keep-together: always; " ); + style.append( "keep-together:always;" ); } } diff --git a/src/scratchpad/testcases/org/apache/poi/hwpf/converter/TestWordToHtmlConverter.java b/src/scratchpad/testcases/org/apache/poi/hwpf/converter/TestWordToHtmlConverter.java index aa50fe2e52..d71c1a821d 100644 --- a/src/scratchpad/testcases/org/apache/poi/hwpf/converter/TestWordToHtmlConverter.java +++ b/src/scratchpad/testcases/org/apache/poi/hwpf/converter/TestWordToHtmlConverter.java @@ -92,7 +92,7 @@ public class TestWordToHtmlConverter extends TestCase public void testAIOOBTap() throws Exception { String result = getHtmlText( "AIOOB-Tap.doc" ); - assertContains( result.substring( 0, 2000 ), "" ); + assertContains( result.substring( 0, 6000 ), "
" ); } public void testBug33519() throws Exception