From 4fe8a5ab89f6727515086ca8292edc03a73d53b8 Mon Sep 17 00:00:00 2001 From: Sergey Vladimirov Date: Wed, 20 Jul 2011 16:01:19 +0000 Subject: [PATCH] add bookmarks support for Word-to-HTML and Word-to-FO converters git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1148824 13f79535-47bb-0310-9956-ffa450edef68 --- .../hwpf/converter/AbstractWordConverter.java | 177 +++++++++++++++++- .../hwpf/converter/HtmlDocumentFacade.java | 7 + .../poi/hwpf/converter/WordToFoConverter.java | 64 ++++--- .../hwpf/converter/WordToHtmlConverter.java | 20 ++ .../apache/poi/hwpf/usermodel/Bookmark.java | 29 ++- .../poi/hwpf/usermodel/BookmarksImpl.java | 133 ++++++++----- .../converter/TestWordToHtmlConverter.java | 1 + 7 files changed, 351 insertions(+), 80 deletions(-) diff --git a/src/scratchpad/src/org/apache/poi/hwpf/converter/AbstractWordConverter.java b/src/scratchpad/src/org/apache/poi/hwpf/converter/AbstractWordConverter.java index 61daedff82..d0ba6211f0 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/converter/AbstractWordConverter.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/converter/AbstractWordConverter.java @@ -16,6 +16,12 @@ ==================================================================== */ package org.apache.poi.hwpf.converter; +import java.util.ArrayList; +import java.util.Collections; +import java.util.LinkedHashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -23,10 +29,11 @@ import org.apache.poi.hpsf.SummaryInformation; import org.apache.poi.hwpf.HWPFDocument; import org.apache.poi.hwpf.HWPFDocumentCore; import org.apache.poi.hwpf.converter.FontReplacer.Triplet; -import org.apache.poi.hwpf.model.FieldsDocumentPart; import org.apache.poi.hwpf.model.Field; +import org.apache.poi.hwpf.model.FieldsDocumentPart; import org.apache.poi.hwpf.model.ListFormatOverride; import org.apache.poi.hwpf.model.ListTables; +import org.apache.poi.hwpf.usermodel.Bookmark; import org.apache.poi.hwpf.usermodel.CharacterRun; import org.apache.poi.hwpf.usermodel.Paragraph; import org.apache.poi.hwpf.usermodel.Picture; @@ -51,6 +58,8 @@ public abstract class AbstractWordConverter private static final POILogger logger = POILogFactory .getLogger( AbstractWordConverter.class ); + private final Set bookmarkStack = new LinkedHashSet(); + private FontReplacer fontReplacer = new DefaultFontReplacer(); protected Triplet getCharacterRunTriplet( CharacterRun characterRun ) @@ -73,7 +82,16 @@ public abstract class AbstractWordConverter protected abstract void outputCharacters( Element block, CharacterRun characterRun, String text ); - protected boolean processCharacters( HWPFDocumentCore hwpfDocument, + /** + * Wrap range into bookmark(s) and process it. All bookmarks have starts + * equal to range start and ends equal to range end. Usually it's only one + * bookmark. + */ + protected abstract void processBookmarks( HWPFDocumentCore wordDocument, + Element currentBlock, Range range, int currentTableLevel, + List rangeBookmarks ); + + protected boolean processCharacters( HWPFDocumentCore document, int currentTableLevel, Range range, final Element block ) { if ( range == null ) @@ -81,6 +99,22 @@ public abstract class AbstractWordConverter boolean haveAnyText = false; + if ( document instanceof HWPFDocument ) + { + final HWPFDocument doc = (HWPFDocument) document; + Map> rangeBookmarks = doc.getBookmarks() + .getBookmarksStartedBetween( range.getStartOffset(), + range.getEndOffset() ); + + if ( rangeBookmarks != null && !rangeBookmarks.isEmpty() ) + { + boolean processedAny = processRangeBookmarks( doc, + currentTableLevel, range, block, rangeBookmarks ); + if ( processedAny ) + return true; + } + } + for ( int c = 0; c < range.numCharacterRuns(); c++ ) { CharacterRun characterRun = range.getCharacterRun( c ); @@ -88,11 +122,11 @@ public abstract class AbstractWordConverter if ( characterRun == null ) throw new AssertionError(); - if ( hwpfDocument instanceof HWPFDocument - && ( (HWPFDocument) hwpfDocument ).getPicturesTable() + if ( document instanceof HWPFDocument + && ( (HWPFDocument) document ).getPicturesTable() .hasPicture( characterRun ) ) { - HWPFDocument newFormat = (HWPFDocument) hwpfDocument; + HWPFDocument newFormat = (HWPFDocument) document; Picture picture = newFormat.getPicturesTable().extractPicture( characterRun, true ); @@ -107,15 +141,15 @@ public abstract class AbstractWordConverter if ( text.getBytes()[0] == FIELD_BEGIN_MARK ) { - if ( hwpfDocument instanceof HWPFDocument ) + if ( document instanceof HWPFDocument ) { - Field aliveField = ( (HWPFDocument) hwpfDocument ) + Field aliveField = ( (HWPFDocument) document ) .getFieldsTables().lookupFieldByStartOffset( FieldsDocumentPart.MAIN, characterRun.getStartOffset() ); if ( aliveField != null ) { - processField( ( (HWPFDocument) hwpfDocument ), range, + processField( ( (HWPFDocument) document ), range, currentTableLevel, aliveField, block ); int continueAfter = aliveField.getFieldEndOffset(); @@ -130,8 +164,8 @@ public abstract class AbstractWordConverter } } - int skipTo = tryDeadField( hwpfDocument, range, - currentTableLevel, c, block ); + int skipTo = tryDeadField( document, range, currentTableLevel, + c, block ); if ( skipTo != c ) { @@ -337,6 +371,129 @@ public abstract class AbstractWordConverter Element parentFopElement, int currentTableLevel, Paragraph paragraph, String bulletText ); + private boolean processRangeBookmarks( HWPFDocumentCore document, + int currentTableLevel, Range range, final Element block, + Map> rangeBookmakrs ) + { + final int startOffset = range.getStartOffset(); + final int endOffset = range.getEndOffset(); + + int beforeBookmarkStart = startOffset; + for ( Map.Entry> entry : rangeBookmakrs + .entrySet() ) + { + final List startedAt = entry.getValue(); + + final List bookmarks; + if ( entry.getKey().intValue() == startOffset + && !bookmarkStack.isEmpty() ) + { + /* + * we need to filter out some bookmarks because already + * processing them in caller methods + */ + List filtered = new ArrayList( + startedAt.size() ); + for ( Bookmark bookmark : startedAt ) + { + if ( this.bookmarkStack.contains( bookmark ) ) + continue; + + filtered.add( bookmark ); + } + + if ( filtered.isEmpty() ) + // no bookmarks - skip to next start point + continue; + + bookmarks = filtered; + } + else + { + bookmarks = startedAt; + } + + // TODO: test me + /* + * we processing only bookmarks with max size, they shall be first + * in sorted list. Other bookmarks will be processed by called + * method + */ + final Bookmark firstBookmark = bookmarks.iterator().next(); + final int startBookmarkOffset = firstBookmark.getStart(); + final int endBookmarkOffset = Math.min( firstBookmark.getEnd(), + range.getEndOffset() ); + List toProcess = new ArrayList( + bookmarks.size() ); + for ( Bookmark bookmark : bookmarks ) + { + if ( Math.min( bookmark.getEnd(), range.getEndOffset() ) != endBookmarkOffset ) + break; + toProcess.add( bookmark ); + } + + if ( beforeBookmarkStart != startBookmarkOffset ) + { + // we have range before bookmark + Range beforeBookmarkRange = new Range( beforeBookmarkStart, + startBookmarkOffset, range ) + { + @Override + public String toString() + { + return "BeforeBookmarkRange (" + super.toString() + ")"; + } + }; + processCharacters( document, currentTableLevel, + beforeBookmarkRange, block ); + } + Range bookmarkRange = new Range( startBookmarkOffset, + endBookmarkOffset, range ) + { + @Override + public String toString() + { + return "BookmarkRange (" + super.toString() + ")"; + } + }; + + bookmarkStack.addAll( toProcess ); + try + { + processBookmarks( document, block, bookmarkRange, + currentTableLevel, + Collections.unmodifiableList( toProcess ) ); + } + finally + { + bookmarkStack.removeAll( toProcess ); + } + beforeBookmarkStart = endBookmarkOffset; + } + + if ( beforeBookmarkStart == startOffset ) + { + return false; + } + + if ( beforeBookmarkStart != endOffset ) + { + // we have range after last bookmark + Range afterLastBookmarkRange = new Range( beforeBookmarkStart, + endOffset, range ) + { + @Override + public String toString() + { + return "AfterBookmarkRange (" + super.toString() + ")"; + } + }; + processCharacters( document, currentTableLevel, + afterLastBookmarkRange, block ); + } + return true; + } + protected abstract void processSection( HWPFDocumentCore wordDocument, Section section, int s ); diff --git a/src/scratchpad/src/org/apache/poi/hwpf/converter/HtmlDocumentFacade.java b/src/scratchpad/src/org/apache/poi/hwpf/converter/HtmlDocumentFacade.java index 6cd6227a1a..a6e38073ce 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/converter/HtmlDocumentFacade.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/converter/HtmlDocumentFacade.java @@ -89,6 +89,13 @@ public class HtmlDocumentFacade return document.createElement( "div" ); } + public Element createBookmark( String name ) + { + final Element basicLink = document.createElement( "a" ); + basicLink.setAttribute( "name", name ); + return basicLink; + } + public Element createHeader1() { return document.createElement( "h1" ); diff --git a/src/scratchpad/src/org/apache/poi/hwpf/converter/WordToFoConverter.java b/src/scratchpad/src/org/apache/poi/hwpf/converter/WordToFoConverter.java index 939f749dc8..bde7a8c6ad 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/converter/WordToFoConverter.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/converter/WordToFoConverter.java @@ -18,6 +18,7 @@ package org.apache.poi.hwpf.converter; import java.io.File; import java.io.FileWriter; +import java.util.List; import java.util.Stack; import javax.xml.parsers.DocumentBuilderFactory; @@ -31,6 +32,7 @@ import org.apache.poi.hpsf.SummaryInformation; import org.apache.poi.hwpf.HWPFDocument; import org.apache.poi.hwpf.HWPFDocumentCore; import org.apache.poi.hwpf.converter.FontReplacer.Triplet; +import org.apache.poi.hwpf.usermodel.Bookmark; import org.apache.poi.hwpf.usermodel.CharacterRun; import org.apache.poi.hwpf.usermodel.Paragraph; import org.apache.poi.hwpf.usermodel.Picture; @@ -51,28 +53,6 @@ import org.w3c.dom.Text; public class WordToFoConverter extends AbstractWordConverter { - /** - * Holds properties values, applied to current fo:block element. - * Those properties shall not be doubled in children fo:inline - * elements. - */ - private static class BlockProperies - { - final boolean pBold; - final String pFontName; - final int pFontSize; - final boolean pItalic; - - public BlockProperies( String pFontName, int pFontSize, boolean pBold, - boolean pItalic ) - { - this.pFontName = pFontName; - this.pFontSize = pFontSize; - this.pBold = pBold; - this.pItalic = pItalic; - } - } - private static final POILogger logger = POILogFactory .getLogger( WordToFoConverter.class ); @@ -237,6 +217,24 @@ public class WordToFoConverter extends AbstractWordConverter inline.appendChild( textNode ); } + @Override + protected void processBookmarks( HWPFDocumentCore wordDocument, + Element currentBlock, Range range, int currentTableLevel, + List rangeBookmarks ) + { + Element parent = currentBlock; + for ( Bookmark bookmark : rangeBookmarks ) + { + Element bookmarkElement = foDocumentFacade.createInline(); + bookmarkElement.setAttribute( "id", bookmark.getName() ); + parent.appendChild( bookmarkElement ); + parent = bookmarkElement; + } + + if ( range != null ) + processCharacters( wordDocument, currentTableLevel, range, parent ); + } + @Override protected void processDocumentInformation( SummaryInformation summaryInformation ) @@ -509,4 +507,26 @@ public class WordToFoConverter extends AbstractWordConverter } } + /** + * Holds properties values, applied to current fo:block element. + * Those properties shall not be doubled in children fo:inline + * elements. + */ + private static class BlockProperies + { + final boolean pBold; + final String pFontName; + final int pFontSize; + final boolean pItalic; + + public BlockProperies( String pFontName, int pFontSize, boolean pBold, + boolean pItalic ) + { + this.pFontName = pFontName; + this.pFontSize = pFontSize; + this.pBold = pBold; + this.pItalic = pItalic; + } + } + } diff --git a/src/scratchpad/src/org/apache/poi/hwpf/converter/WordToHtmlConverter.java b/src/scratchpad/src/org/apache/poi/hwpf/converter/WordToHtmlConverter.java index f81be74c82..71eddf12ab 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/converter/WordToHtmlConverter.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/converter/WordToHtmlConverter.java @@ -18,6 +18,7 @@ package org.apache.poi.hwpf.converter; import java.io.File; import java.io.FileWriter; +import java.util.List; import java.util.Stack; import javax.xml.parsers.DocumentBuilderFactory; @@ -31,6 +32,7 @@ import org.apache.poi.hpsf.SummaryInformation; import org.apache.poi.hwpf.HWPFDocument; import org.apache.poi.hwpf.HWPFDocumentCore; import org.apache.poi.hwpf.converter.FontReplacer.Triplet; +import org.apache.poi.hwpf.usermodel.Bookmark; import org.apache.poi.hwpf.usermodel.CharacterRun; import org.apache.poi.hwpf.usermodel.Paragraph; import org.apache.poi.hwpf.usermodel.Picture; @@ -234,6 +236,24 @@ public class WordToHtmlConverter extends AbstractWordConverter basicLink ); } + @Override + protected void processBookmarks( HWPFDocumentCore wordDocument, + Element currentBlock, Range range, int currentTableLevel, + List rangeBookmarks ) + { + Element parent = currentBlock; + for ( Bookmark bookmark : rangeBookmarks ) + { + Element bookmarkElement = htmlDocumentFacade + .createBookmark( bookmark.getName() ); + parent.appendChild( bookmarkElement ); + parent = bookmarkElement; + } + + if ( range != null ) + processCharacters( wordDocument, currentTableLevel, range, parent ); + } + /** * This method shall store image bytes in external file and convert it if * necessary. Images shall be stored using PNG format. Other formats may be diff --git a/src/scratchpad/src/org/apache/poi/hwpf/usermodel/Bookmark.java b/src/scratchpad/src/org/apache/poi/hwpf/usermodel/Bookmark.java index 9dfd6b6908..1836d8906c 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/usermodel/Bookmark.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/usermodel/Bookmark.java @@ -1,12 +1,33 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ package org.apache.poi.hwpf.usermodel; +/** + * User friendly interface to access information about document bookmarks + * + * @author Sergey Vladimirov (vlsergey {at} gmail {doc} com) + */ public interface Bookmark { - public int getEnd(); + int getEnd(); - public String getName(); + String getName(); - public int getStart(); + int getStart(); - public void setName( String name ); + void setName( String name ); } diff --git a/src/scratchpad/src/org/apache/poi/hwpf/usermodel/BookmarksImpl.java b/src/scratchpad/src/org/apache/poi/hwpf/usermodel/BookmarksImpl.java index 40b40644b3..9bc5e77f3b 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/usermodel/BookmarksImpl.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/usermodel/BookmarksImpl.java @@ -50,50 +50,7 @@ public class BookmarksImpl implements Bookmarks private Bookmark getBookmark( final GenericPropertyNode first ) { - return new Bookmark() - { - public int getEnd() - { - int currentIndex = bookmarksTables - .getDescriptorFirstIndex( first ); - try - { - GenericPropertyNode descriptorLim = bookmarksTables - .getDescriptorLim( currentIndex ); - return descriptorLim.getStart(); - } - catch ( IndexOutOfBoundsException exc ) - { - return first.getEnd(); - } - } - - public String getName() - { - int currentIndex = bookmarksTables - .getDescriptorFirstIndex( first ); - try - { - return bookmarksTables.getName( currentIndex ); - } - catch ( ArrayIndexOutOfBoundsException exc ) - { - return ""; - } - } - - public int getStart() - { - return first.getStart(); - } - - public void setName( String name ) - { - int currentIndex = bookmarksTables - .getDescriptorFirstIndex( first ); - bookmarksTables.setName( currentIndex, name ); - } - }; + return new BookmarkImpl( first ); } public Bookmark getBookmark( int index ) @@ -143,6 +100,11 @@ public class BookmarksImpl implements Bookmarks for ( int lookupIndex = startLookupIndex; lookupIndex < endLookupIndex; lookupIndex++ ) { int s = sortedStartPositions[lookupIndex]; + if ( s < startInclusive ) + continue; + if ( s >= endExclusive ) + break; + List startedAt = getBookmarksAt( s ); if ( startedAt != null ) result.put( Integer.valueOf( s ), startedAt ); @@ -186,4 +148,87 @@ public class BookmarksImpl implements Bookmarks this.sortedDescriptors = result; this.sortedStartPositions = indices; } + + private final class BookmarkImpl implements Bookmark + { + private final GenericPropertyNode first; + + private BookmarkImpl( GenericPropertyNode first ) + { + this.first = first; + } + + @Override + public boolean equals( Object obj ) + { + if ( this == obj ) + return true; + if ( obj == null ) + return false; + if ( getClass() != obj.getClass() ) + return false; + BookmarkImpl other = (BookmarkImpl) obj; + if ( first == null ) + { + if ( other.first != null ) + return false; + } + else if ( !first.equals( other.first ) ) + return false; + return true; + } + + public int getEnd() + { + int currentIndex = bookmarksTables.getDescriptorFirstIndex( first ); + try + { + GenericPropertyNode descriptorLim = bookmarksTables + .getDescriptorLim( currentIndex ); + return descriptorLim.getStart(); + } + catch ( IndexOutOfBoundsException exc ) + { + return first.getEnd(); + } + } + + public String getName() + { + int currentIndex = bookmarksTables.getDescriptorFirstIndex( first ); + try + { + return bookmarksTables.getName( currentIndex ); + } + catch ( ArrayIndexOutOfBoundsException exc ) + { + return ""; + } + } + + public int getStart() + { + return first.getStart(); + } + + @Override + public int hashCode() + { + return 31 + ( first == null ? 0 : first.hashCode() ); + } + + public void setName( String name ) + { + int currentIndex = bookmarksTables.getDescriptorFirstIndex( first ); + bookmarksTables.setName( currentIndex, name ); + } + + @Override + public String toString() + { + return "Bookmark [" + getStart() + "; " + getEnd() + "): name: " + + getName(); + } + + } } diff --git a/src/scratchpad/testcases/org/apache/poi/hwpf/converter/TestWordToHtmlConverter.java b/src/scratchpad/testcases/org/apache/poi/hwpf/converter/TestWordToHtmlConverter.java index d71c1a821d..175be9058e 100644 --- a/src/scratchpad/testcases/org/apache/poi/hwpf/converter/TestWordToHtmlConverter.java +++ b/src/scratchpad/testcases/org/apache/poi/hwpf/converter/TestWordToHtmlConverter.java @@ -187,6 +187,7 @@ public class TestWordToHtmlConverter extends TestCase String result = getHtmlText( "pageref.doc" ); assertContains( result, "" ); + assertContains( result, "" ); assertContains( result, "1" ); } }