mirror of https://github.com/apache/poi.git
bug 51351: more progress with WordToFoExtractor: support for hyperlinks, common fields and code cleanup
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1137673 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
49448123e1
commit
21885a6fd5
|
@ -0,0 +1,206 @@
|
|||
/*
|
||||
* ====================================================================
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
* ====================================================================
|
||||
*/
|
||||
package org.apache.poi.hwpf.extractor;
|
||||
|
||||
import org.w3c.dom.Document;
|
||||
import org.w3c.dom.Element;
|
||||
import org.w3c.dom.Text;
|
||||
|
||||
public abstract class AbstractToFoExtractor
|
||||
{
|
||||
|
||||
private static final String NS_XSLFO = "http://www.w3.org/1999/XSL/Format";
|
||||
|
||||
protected final Document document;
|
||||
protected final Element layoutMasterSet;
|
||||
protected final Element root;
|
||||
|
||||
public AbstractToFoExtractor( Document document )
|
||||
{
|
||||
this.document = document;
|
||||
|
||||
root = document.createElementNS( NS_XSLFO, "fo:root" );
|
||||
document.appendChild( root );
|
||||
|
||||
layoutMasterSet = document.createElementNS( NS_XSLFO,
|
||||
"fo:layout-master-set" );
|
||||
root.appendChild( layoutMasterSet );
|
||||
}
|
||||
|
||||
protected Element addFlowToPageSequence( final Element pageSequence,
|
||||
String flowName )
|
||||
{
|
||||
final Element flow = document.createElementNS( NS_XSLFO, "fo:flow" );
|
||||
flow.setAttribute( "flow-name", flowName );
|
||||
pageSequence.appendChild( flow );
|
||||
|
||||
return flow;
|
||||
}
|
||||
|
||||
protected Element addListItem( Element listBlock )
|
||||
{
|
||||
Element result = createListItem();
|
||||
listBlock.appendChild( result );
|
||||
return result;
|
||||
}
|
||||
|
||||
protected Element addListItemBody( Element listItem )
|
||||
{
|
||||
Element result = createListItemBody();
|
||||
listItem.appendChild( result );
|
||||
return result;
|
||||
}
|
||||
|
||||
protected Element addListItemLabel( Element listItem, String text )
|
||||
{
|
||||
Element result = createListItemLabel( text );
|
||||
listItem.appendChild( result );
|
||||
return result;
|
||||
}
|
||||
|
||||
protected Element addPageSequence( String pageMaster )
|
||||
{
|
||||
final Element pageSequence = document.createElementNS( NS_XSLFO,
|
||||
"fo:page-sequence" );
|
||||
pageSequence.setAttribute( "master-reference", pageMaster );
|
||||
root.appendChild( pageSequence );
|
||||
return pageSequence;
|
||||
}
|
||||
|
||||
protected Element addRegionBody( Element pageMaster )
|
||||
{
|
||||
final Element regionBody = document.createElementNS( NS_XSLFO,
|
||||
"fo:region-body" );
|
||||
pageMaster.appendChild( regionBody );
|
||||
|
||||
return regionBody;
|
||||
}
|
||||
|
||||
protected Element addSimplePageMaster( String masterName )
|
||||
{
|
||||
final Element simplePageMaster = document.createElementNS( NS_XSLFO,
|
||||
"fo:simple-page-master" );
|
||||
simplePageMaster.setAttribute( "master-name", masterName );
|
||||
layoutMasterSet.appendChild( simplePageMaster );
|
||||
|
||||
return simplePageMaster;
|
||||
}
|
||||
|
||||
protected Element addTable( Element flow )
|
||||
{
|
||||
final Element table = document.createElementNS( NS_XSLFO, "fo:table" );
|
||||
flow.appendChild( table );
|
||||
return table;
|
||||
}
|
||||
|
||||
protected Element createBasicLinkExternal( String externalDestination )
|
||||
{
|
||||
final Element basicLink = document.createElementNS( NS_XSLFO,
|
||||
"fo:basic-link" );
|
||||
basicLink.setAttribute( "external-destination", externalDestination );
|
||||
return basicLink;
|
||||
}
|
||||
|
||||
protected Element createBasicLinkInternal( String internalDestination )
|
||||
{
|
||||
final Element basicLink = document.createElementNS( NS_XSLFO,
|
||||
"fo:basic-link" );
|
||||
basicLink.setAttribute( "internal-destination", internalDestination );
|
||||
return basicLink;
|
||||
}
|
||||
|
||||
protected Element createBlock()
|
||||
{
|
||||
return document.createElementNS( NS_XSLFO, "fo:block" );
|
||||
}
|
||||
|
||||
protected Element createExternalGraphic( String source )
|
||||
{
|
||||
Element result = document.createElementNS( NS_XSLFO,
|
||||
"fo:external-graphic" );
|
||||
result.setAttribute( "src", "url('" + source + "')" );
|
||||
return result;
|
||||
}
|
||||
|
||||
protected Element createInline()
|
||||
{
|
||||
return document.createElementNS( NS_XSLFO, "fo:inline" );
|
||||
}
|
||||
|
||||
protected Element createLeader()
|
||||
{
|
||||
return document.createElementNS( NS_XSLFO, "fo:leader" );
|
||||
}
|
||||
|
||||
protected Element createListBlock()
|
||||
{
|
||||
return document.createElementNS( NS_XSLFO, "fo:list-block" );
|
||||
}
|
||||
|
||||
protected Element createListItem()
|
||||
{
|
||||
return document.createElementNS( NS_XSLFO, "fo:list-item" );
|
||||
}
|
||||
|
||||
protected Element createListItemBody()
|
||||
{
|
||||
return document.createElementNS( NS_XSLFO, "fo:list-item-body" );
|
||||
}
|
||||
|
||||
protected Element createListItemLabel( String text )
|
||||
{
|
||||
Element result = document.createElementNS( NS_XSLFO,
|
||||
"fo:list-item-label" );
|
||||
Element block = createBlock();
|
||||
block.appendChild( document.createTextNode( text ) );
|
||||
result.appendChild( block );
|
||||
return result;
|
||||
}
|
||||
|
||||
protected Element createTableBody()
|
||||
{
|
||||
return document.createElementNS( NS_XSLFO, "fo:table-body" );
|
||||
}
|
||||
|
||||
protected Element createTableCell()
|
||||
{
|
||||
return document.createElementNS( NS_XSLFO, "fo:table-cell" );
|
||||
}
|
||||
|
||||
protected Element createTableHeader()
|
||||
{
|
||||
return document.createElementNS( NS_XSLFO, "fo:table-header" );
|
||||
}
|
||||
|
||||
protected Element createTableRow()
|
||||
{
|
||||
return document.createElementNS( NS_XSLFO, "fo:table-row" );
|
||||
}
|
||||
|
||||
protected Text createText( String data )
|
||||
{
|
||||
return document.createTextNode( data );
|
||||
}
|
||||
|
||||
public Document getDocument()
|
||||
{
|
||||
return document;
|
||||
}
|
||||
|
||||
}
|
|
@ -16,7 +16,6 @@
|
|||
* limitations under the License.
|
||||
* ====================================================================
|
||||
*/
|
||||
|
||||
package org.apache.poi.hwpf.extractor;
|
||||
|
||||
import java.io.File;
|
||||
|
@ -25,6 +24,9 @@ import java.io.FileWriter;
|
|||
import java.io.IOException;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
import java.util.Stack;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import javax.xml.parsers.DocumentBuilderFactory;
|
||||
import javax.xml.transform.OutputKeys;
|
||||
|
@ -46,6 +48,8 @@ import org.apache.poi.hwpf.usermodel.Table;
|
|||
import org.apache.poi.hwpf.usermodel.TableCell;
|
||||
import org.apache.poi.hwpf.usermodel.TableIterator;
|
||||
import org.apache.poi.hwpf.usermodel.TableRow;
|
||||
import org.apache.poi.util.POILogFactory;
|
||||
import org.apache.poi.util.POILogger;
|
||||
import org.w3c.dom.Document;
|
||||
import org.w3c.dom.Element;
|
||||
import org.w3c.dom.Text;
|
||||
|
@ -55,7 +59,30 @@ import static org.apache.poi.hwpf.extractor.WordToFoUtils.TWIPS_PER_INCH;
|
|||
/**
|
||||
* @author Sergey Vladimirov (vlsergey {at} gmail {dot} com)
|
||||
*/
|
||||
public class WordToFoExtractor {
|
||||
public class WordToFoExtractor extends AbstractToFoExtractor
|
||||
{
|
||||
|
||||
/**
|
||||
* Holds properties values, applied to current <tt>fo:block</tt> element.
|
||||
* Those properties shall not be doubled in children <tt>fo:inline</tt>
|
||||
* elements.
|
||||
*/
|
||||
private static class BlockProperies
|
||||
{
|
||||
final boolean pBold;
|
||||
final String pFontName;
|
||||
final int pFontSize;
|
||||
final boolean pItalic;
|
||||
|
||||
public BlockProperies( String pFontName, int pFontSize, boolean pBold,
|
||||
boolean pItalic )
|
||||
{
|
||||
this.pFontName = pFontName;
|
||||
this.pFontSize = pFontSize;
|
||||
this.pBold = pBold;
|
||||
this.pItalic = pItalic;
|
||||
}
|
||||
}
|
||||
|
||||
private static final byte BEL_MARK = 7;
|
||||
|
||||
|
@ -65,22 +92,74 @@ public class WordToFoExtractor {
|
|||
|
||||
private static final byte FIELD_SEPARATOR_MARK = 20;
|
||||
|
||||
private static final String NS_XSLFO = "http://www.w3.org/1999/XSL/Format";
|
||||
private static final POILogger logger = POILogFactory
|
||||
.getLogger( WordToFoExtractor.class );
|
||||
|
||||
private static HWPFDocument loadDoc(File docFile) throws IOException {
|
||||
private static HWPFDocument loadDoc( File docFile ) throws IOException
|
||||
{
|
||||
final FileInputStream istream = new FileInputStream( docFile );
|
||||
try {
|
||||
try
|
||||
{
|
||||
return new HWPFDocument( istream );
|
||||
} finally {
|
||||
try {
|
||||
}
|
||||
finally
|
||||
{
|
||||
try
|
||||
{
|
||||
istream.close();
|
||||
} catch (Exception exc) {
|
||||
// no op
|
||||
}
|
||||
catch ( Exception exc )
|
||||
{
|
||||
logger.log( POILogger.ERROR,
|
||||
"Unable to close FileInputStream: " + exc, exc );
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static Document process(File docFile) throws Exception {
|
||||
/**
|
||||
* Java main() interface to interact with WordToFoExtractor
|
||||
*
|
||||
* <p>
|
||||
* Usage: WordToFoExtractor infile outfile
|
||||
* </p>
|
||||
* Where infile is an input .doc file ( Word 97-2007) which will be rendered
|
||||
* as XSL-FO into outfile
|
||||
*
|
||||
*/
|
||||
public static void main( String[] args )
|
||||
{
|
||||
if ( args.length < 2 )
|
||||
{
|
||||
System.err
|
||||
.println( "Usage: WordToFoExtractor <inputFile.doc> <saveTo.fo>" );
|
||||
return;
|
||||
}
|
||||
|
||||
System.out.println( "Converting " + args[0] );
|
||||
System.out.println( "Saving output to " + args[1] );
|
||||
try
|
||||
{
|
||||
Document doc = WordToFoExtractor.process( new File( args[0] ) );
|
||||
|
||||
FileWriter out = new FileWriter( args[1] );
|
||||
DOMSource domSource = new DOMSource( doc );
|
||||
StreamResult streamResult = new StreamResult( out );
|
||||
TransformerFactory tf = TransformerFactory.newInstance();
|
||||
Transformer serializer = tf.newTransformer();
|
||||
// TODO set encoding from a command argument
|
||||
serializer.setOutputProperty( OutputKeys.ENCODING, "UTF-8" );
|
||||
serializer.setOutputProperty( OutputKeys.INDENT, "yes" );
|
||||
serializer.transform( domSource, streamResult );
|
||||
out.close();
|
||||
}
|
||||
catch ( Exception e )
|
||||
{
|
||||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
|
||||
static Document process( File docFile ) throws Exception
|
||||
{
|
||||
final HWPFDocument hwpfDocument = loadDoc( docFile );
|
||||
WordToFoExtractor wordToFoExtractor = new WordToFoExtractor(
|
||||
DocumentBuilderFactory.newInstance().newDocumentBuilder()
|
||||
|
@ -89,123 +168,24 @@ public class WordToFoExtractor {
|
|||
return wordToFoExtractor.getDocument();
|
||||
}
|
||||
|
||||
private final Document document;
|
||||
private final Stack<BlockProperies> blocksProperies = new Stack<BlockProperies>();
|
||||
|
||||
private final Element layoutMasterSet;
|
||||
|
||||
private final Element root;
|
||||
|
||||
public WordToFoExtractor(Document document) throws Exception {
|
||||
this.document = document;
|
||||
|
||||
root = document.createElementNS(NS_XSLFO, "fo:root");
|
||||
document.appendChild(root);
|
||||
|
||||
layoutMasterSet = document.createElementNS(NS_XSLFO,
|
||||
"fo:layout-master-set");
|
||||
root.appendChild(layoutMasterSet);
|
||||
}
|
||||
|
||||
protected Element addFlowToPageSequence(final Element pageSequence,
|
||||
String flowName) {
|
||||
final Element flow = document.createElementNS(NS_XSLFO, "fo:flow");
|
||||
flow.setAttribute("flow-name", flowName);
|
||||
pageSequence.appendChild(flow);
|
||||
|
||||
return flow;
|
||||
}
|
||||
|
||||
protected Element addListItem(Element listBlock) {
|
||||
Element result = createListItem();
|
||||
listBlock.appendChild(result);
|
||||
return result;
|
||||
}
|
||||
|
||||
protected Element addListItemBody(Element listItem) {
|
||||
Element result = createListItemBody();
|
||||
listItem.appendChild(result);
|
||||
return result;
|
||||
}
|
||||
|
||||
protected Element addListItemLabel(Element listItem, String text) {
|
||||
Element result = createListItemLabel(text);
|
||||
listItem.appendChild(result);
|
||||
return result;
|
||||
}
|
||||
|
||||
protected Element addPageSequence(String pageMaster) {
|
||||
final Element pageSequence = document.createElementNS(NS_XSLFO,
|
||||
"fo:page-sequence");
|
||||
pageSequence.setAttribute("master-reference", pageMaster);
|
||||
root.appendChild(pageSequence);
|
||||
return pageSequence;
|
||||
}
|
||||
|
||||
protected Element addRegionBody(Element pageMaster) {
|
||||
final Element regionBody = document.createElementNS(NS_XSLFO,
|
||||
"fo:region-body");
|
||||
pageMaster.appendChild(regionBody);
|
||||
|
||||
return regionBody;
|
||||
}
|
||||
|
||||
protected Element addSimplePageMaster(String masterName) {
|
||||
final Element simplePageMaster = document.createElementNS(NS_XSLFO,
|
||||
"fo:simple-page-master");
|
||||
simplePageMaster.setAttribute("master-name", masterName);
|
||||
layoutMasterSet.appendChild(simplePageMaster);
|
||||
|
||||
return simplePageMaster;
|
||||
}
|
||||
|
||||
protected Element addTable(Element flow) {
|
||||
final Element table = document.createElementNS(NS_XSLFO, "fo:table");
|
||||
flow.appendChild(table);
|
||||
return table;
|
||||
}
|
||||
|
||||
protected Element createBlock() {
|
||||
return document.createElementNS(NS_XSLFO, "fo:block");
|
||||
}
|
||||
|
||||
protected Element createExternalGraphic(String source) {
|
||||
Element result = document.createElementNS(NS_XSLFO,
|
||||
"fo:external-graphic");
|
||||
result.setAttribute("src", "url('" + source + "')");
|
||||
return result;
|
||||
}
|
||||
|
||||
protected Element createInline() {
|
||||
return document.createElementNS(NS_XSLFO, "fo:inline");
|
||||
}
|
||||
|
||||
protected Element createLeader() {
|
||||
return document.createElementNS(NS_XSLFO, "fo:leader");
|
||||
}
|
||||
|
||||
protected Element createListBlock() {
|
||||
return document.createElementNS(NS_XSLFO, "fo:list-block");
|
||||
}
|
||||
|
||||
protected Element createListItem() {
|
||||
return document.createElementNS(NS_XSLFO, "fo:list-item");
|
||||
}
|
||||
|
||||
protected Element createListItemBody() {
|
||||
return document.createElementNS(NS_XSLFO, "fo:list-item-body");
|
||||
}
|
||||
|
||||
protected Element createListItemLabel(String text) {
|
||||
Element result = document.createElementNS(NS_XSLFO,
|
||||
"fo:list-item-label");
|
||||
Element block = createBlock();
|
||||
block.appendChild(document.createTextNode(text));
|
||||
result.appendChild(block);
|
||||
return result;
|
||||
/**
|
||||
* Creates new instance of {@link WordToFoExtractor}. Can be used for output
|
||||
* several {@link HWPFDocument}s into single FO document.
|
||||
*
|
||||
* @param document
|
||||
* XML DOM Document used as XSL FO document. Shall support
|
||||
* namespaces
|
||||
*/
|
||||
public WordToFoExtractor( Document document )
|
||||
{
|
||||
super( document );
|
||||
}
|
||||
|
||||
protected String createPageMaster( SectionProperties sep, String type,
|
||||
int section) {
|
||||
int section )
|
||||
{
|
||||
float height = sep.getYaPage() / TWIPS_PER_INCH;
|
||||
float width = sep.getXaPage() / TWIPS_PER_INCH;
|
||||
float leftMargin = sep.getDxaLeft() / TWIPS_PER_INCH;
|
||||
|
@ -234,12 +214,17 @@ public class WordToFoExtractor {
|
|||
// WordToFoUtils.setBorder(regionBody, sep.getBrcLeft(), "left");
|
||||
// WordToFoUtils.setBorder(regionBody, sep.getBrcRight(), "right");
|
||||
|
||||
if (sep.getCcolM1() > 0) {
|
||||
regionBody.setAttribute("column-count", "" + (sep.getCcolM1() + 1));
|
||||
if (sep.getFEvenlySpaced()) {
|
||||
if ( sep.getCcolM1() > 0 )
|
||||
{
|
||||
regionBody
|
||||
.setAttribute( "column-count", "" + (sep.getCcolM1() + 1) );
|
||||
if ( sep.getFEvenlySpaced() )
|
||||
{
|
||||
regionBody.setAttribute( "column-gap",
|
||||
(sep.getDxaColumns() / TWIPS_PER_INCH) + "in" );
|
||||
} else {
|
||||
}
|
||||
else
|
||||
{
|
||||
regionBody.setAttribute( "column-gap", "0.25in" );
|
||||
}
|
||||
}
|
||||
|
@ -247,38 +232,173 @@ public class WordToFoExtractor {
|
|||
return pageMasterName;
|
||||
}
|
||||
|
||||
protected Element createTableBody() {
|
||||
return document.createElementNS(NS_XSLFO, "fo:table-body");
|
||||
protected boolean processCharacters( HWPFDocument hwpfDocument,
|
||||
int currentTableLevel, Paragraph paragraph, final Element block,
|
||||
final int start, final int end )
|
||||
{
|
||||
boolean haveAnyText = false;
|
||||
|
||||
for ( int c = start; c < end; c++ )
|
||||
{
|
||||
CharacterRun characterRun = paragraph.getCharacterRun( c );
|
||||
|
||||
if ( hwpfDocument.getPicturesTable().hasPicture( characterRun ) )
|
||||
{
|
||||
Picture picture = hwpfDocument.getPicturesTable()
|
||||
.extractPicture( characterRun, true );
|
||||
|
||||
processImage( block, characterRun.text().charAt( 0 ) == 0x01,
|
||||
picture );
|
||||
continue;
|
||||
}
|
||||
|
||||
protected Element createTableCell() {
|
||||
return document.createElementNS(NS_XSLFO, "fo:table-cell");
|
||||
String text = characterRun.text();
|
||||
if ( text.getBytes().length == 0 )
|
||||
continue;
|
||||
|
||||
if ( text.getBytes()[0] == FIELD_BEGIN_MARK )
|
||||
{
|
||||
int skipTo = tryField( hwpfDocument, paragraph,
|
||||
currentTableLevel, c, block );
|
||||
|
||||
if ( skipTo != c )
|
||||
{
|
||||
c = skipTo;
|
||||
continue;
|
||||
}
|
||||
|
||||
protected Element createTableHeader() {
|
||||
return document.createElementNS(NS_XSLFO, "fo:table-header");
|
||||
continue;
|
||||
}
|
||||
if ( text.getBytes()[0] == FIELD_SEPARATOR_MARK )
|
||||
{
|
||||
// shall not appear without FIELD_BEGIN_MARK
|
||||
continue;
|
||||
}
|
||||
if ( text.getBytes()[0] == FIELD_END_MARK )
|
||||
{
|
||||
// shall not appear without FIELD_BEGIN_MARK
|
||||
continue;
|
||||
}
|
||||
|
||||
protected Element createTableRow() {
|
||||
return document.createElementNS(NS_XSLFO, "fo:table-row");
|
||||
if ( characterRun.isSpecialCharacter() || characterRun.isObj()
|
||||
|| characterRun.isOle2() )
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
protected Text createText(String data) {
|
||||
return document.createTextNode(data);
|
||||
BlockProperies blockProperies = this.blocksProperies.peek();
|
||||
Element inline = createInline();
|
||||
if ( characterRun.isBold() != blockProperies.pBold )
|
||||
{
|
||||
WordToFoUtils.setBold( inline, characterRun.isBold() );
|
||||
}
|
||||
if ( characterRun.isItalic() != blockProperies.pItalic )
|
||||
{
|
||||
WordToFoUtils.setItalic( inline, characterRun.isItalic() );
|
||||
}
|
||||
if ( !WordToFoUtils.equals( characterRun.getFontName(),
|
||||
blockProperies.pFontName ) )
|
||||
{
|
||||
WordToFoUtils
|
||||
.setFontFamily( inline, characterRun.getFontName() );
|
||||
}
|
||||
if ( characterRun.getFontSize() / 2 != blockProperies.pFontSize )
|
||||
{
|
||||
WordToFoUtils.setFontSize( inline,
|
||||
characterRun.getFontSize() / 2 );
|
||||
}
|
||||
WordToFoUtils.setCharactersProperties( characterRun, inline );
|
||||
block.appendChild( inline );
|
||||
|
||||
if ( text.endsWith( "\r" )
|
||||
|| (text.charAt( text.length() - 1 ) == BEL_MARK && currentTableLevel != 0) )
|
||||
text = text.substring( 0, text.length() - 1 );
|
||||
|
||||
Text textNode = createText( text );
|
||||
inline.appendChild( textNode );
|
||||
|
||||
haveAnyText |= text.trim().length() != 0;
|
||||
}
|
||||
|
||||
public Document getDocument() {
|
||||
return document;
|
||||
return haveAnyText;
|
||||
}
|
||||
|
||||
public void processDocument(HWPFDocument hwpfDocument) {
|
||||
public void processDocument( HWPFDocument hwpfDocument )
|
||||
{
|
||||
final Range range = hwpfDocument.getRange();
|
||||
|
||||
for (int s = 0; s < range.numSections(); s++) {
|
||||
for ( int s = 0; s < range.numSections(); s++ )
|
||||
{
|
||||
processSection( hwpfDocument, range.getSection( s ), s );
|
||||
}
|
||||
}
|
||||
|
||||
protected void processField( HWPFDocument hwpfDocument,
|
||||
Element currentBlock, Paragraph paragraph, int currentTableLevel,
|
||||
int beginMark, int separatorMark, int endMark )
|
||||
{
|
||||
|
||||
Pattern hyperlinkPattern = Pattern
|
||||
.compile( "[ \\t\\r\\n]*HYPERLINK \"(.*)\"[ \\t\\r\\n]*" );
|
||||
Pattern pagerefPattern = Pattern
|
||||
.compile( "[ \\t\\r\\n]*PAGEREF ([^ ]*)[ \\t\\r\\n]*\\\\h[ \\t\\r\\n]*" );
|
||||
|
||||
if ( separatorMark - beginMark > 1 )
|
||||
{
|
||||
CharacterRun firstAfterBegin = paragraph
|
||||
.getCharacterRun( beginMark + 1 );
|
||||
|
||||
final Matcher hyperlinkMatcher = hyperlinkPattern
|
||||
.matcher( firstAfterBegin.text() );
|
||||
if ( hyperlinkMatcher.matches() )
|
||||
{
|
||||
String hyperlink = hyperlinkMatcher.group( 1 );
|
||||
processHyperlink( hwpfDocument, currentBlock, paragraph,
|
||||
currentTableLevel, hyperlink, separatorMark + 1,
|
||||
endMark );
|
||||
return;
|
||||
}
|
||||
|
||||
final Matcher pagerefMatcher = pagerefPattern
|
||||
.matcher( firstAfterBegin.text() );
|
||||
if ( pagerefMatcher.matches() )
|
||||
{
|
||||
String pageref = pagerefMatcher.group( 1 );
|
||||
processPageref( hwpfDocument, currentBlock, paragraph,
|
||||
currentTableLevel, pageref, separatorMark + 1, endMark );
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
StringBuilder debug = new StringBuilder( "Unsupported field type: \n" );
|
||||
for ( int i = beginMark; i <= endMark; i++ )
|
||||
{
|
||||
debug.append( "\t" );
|
||||
debug.append( paragraph.getCharacterRun( i ) );
|
||||
debug.append( "\n" );
|
||||
}
|
||||
logger.log( POILogger.WARN, debug );
|
||||
|
||||
// just output field value
|
||||
if ( separatorMark + 1 < endMark )
|
||||
processCharacters( hwpfDocument, currentTableLevel, paragraph,
|
||||
currentBlock, separatorMark + 1, endMark );
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
protected void processHyperlink( HWPFDocument hwpfDocument,
|
||||
Element currentBlock, Paragraph paragraph, int currentTableLevel,
|
||||
String hyperlink, int beginTextInclusive, int endTextExclusive )
|
||||
{
|
||||
Element basicLink = createBasicLinkExternal( hyperlink );
|
||||
currentBlock.appendChild( basicLink );
|
||||
|
||||
if ( beginTextInclusive < endTextExclusive )
|
||||
processCharacters( hwpfDocument, currentTableLevel, paragraph,
|
||||
basicLink, beginTextInclusive, endTextExclusive );
|
||||
}
|
||||
|
||||
/**
|
||||
* This method shall store image bytes in external file and convert it if
|
||||
* necessary. Images shall be stored using PNG format (for bitmap) or SVG
|
||||
|
@ -299,13 +419,29 @@ public class WordToFoExtractor {
|
|||
* HWPF object, contained picture data and properties
|
||||
*/
|
||||
protected void processImage( Element currentBlock, boolean inlined,
|
||||
Picture picture) {
|
||||
Picture picture )
|
||||
{
|
||||
// no default implementation -- skip
|
||||
currentBlock.appendChild( document.createComment( "Image link to '"
|
||||
+ picture.suggestFullFileName() + "' can be here" ) );
|
||||
}
|
||||
|
||||
protected void processPageref( HWPFDocument hwpfDocument,
|
||||
Element currentBlock, Paragraph paragraph, int currentTableLevel,
|
||||
String pageref, int beginTextInclusive, int endTextExclusive )
|
||||
{
|
||||
Element basicLink = createBasicLinkInternal( pageref );
|
||||
currentBlock.appendChild( basicLink );
|
||||
|
||||
if ( beginTextInclusive < endTextExclusive )
|
||||
processCharacters( hwpfDocument, currentTableLevel, paragraph,
|
||||
basicLink, beginTextInclusive, endTextExclusive );
|
||||
}
|
||||
|
||||
protected void processParagraph( HWPFDocument hwpfDocument,
|
||||
Element parentFopElement, int currentTableLevel,
|
||||
Paragraph paragraph, String bulletText) {
|
||||
Paragraph paragraph, String bulletText )
|
||||
{
|
||||
final Element block = createBlock();
|
||||
parentFopElement.appendChild( block );
|
||||
|
||||
|
@ -313,10 +449,12 @@ public class WordToFoExtractor {
|
|||
|
||||
final int charRuns = paragraph.numCharacterRuns();
|
||||
|
||||
if (charRuns == 0) {
|
||||
if ( charRuns == 0 )
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
{
|
||||
final String pFontName;
|
||||
final int pFontSize;
|
||||
final boolean pBold;
|
||||
|
@ -333,97 +471,44 @@ public class WordToFoExtractor {
|
|||
WordToFoUtils.setBold( block, pBold );
|
||||
WordToFoUtils.setItalic( block, pItalic );
|
||||
|
||||
StringBuilder lineText = new StringBuilder();
|
||||
blocksProperies.push( new BlockProperies( pFontName, pFontSize,
|
||||
pBold, pItalic ) );
|
||||
}
|
||||
try
|
||||
{
|
||||
boolean haveAnyText = false;
|
||||
|
||||
if (WordToFoUtils.isNotEmpty(bulletText)) {
|
||||
if ( WordToFoUtils.isNotEmpty( bulletText ) )
|
||||
{
|
||||
Element inline = createInline();
|
||||
block.appendChild( inline );
|
||||
|
||||
Text textNode = createText( bulletText );
|
||||
inline.appendChild( textNode );
|
||||
|
||||
lineText.append(bulletText);
|
||||
haveAnyText |= bulletText.trim().length() != 0;
|
||||
}
|
||||
|
||||
for (int c = 0; c < charRuns; c++) {
|
||||
CharacterRun characterRun = paragraph.getCharacterRun(c);
|
||||
haveAnyText = processCharacters( hwpfDocument, currentTableLevel,
|
||||
paragraph, block, 0, charRuns );
|
||||
|
||||
if (hwpfDocument.getPicturesTable().hasPicture(characterRun)) {
|
||||
Picture picture = hwpfDocument.getPicturesTable()
|
||||
.extractPicture(characterRun, true);
|
||||
|
||||
processImage(block, characterRun.text().charAt(0) == 0x01,
|
||||
picture);
|
||||
continue;
|
||||
}
|
||||
|
||||
String text = characterRun.text();
|
||||
if (text.getBytes().length == 0)
|
||||
continue;
|
||||
|
||||
if (text.getBytes()[0] == FIELD_BEGIN_MARK) {
|
||||
/*
|
||||
* check if we have a field with calculated image as a result.
|
||||
* MathType equation, for example.
|
||||
*/
|
||||
int skipTo = tryImageWithinField(hwpfDocument, paragraph, c,
|
||||
block);
|
||||
|
||||
if (skipTo != c) {
|
||||
c = skipTo;
|
||||
continue;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
if (text.getBytes()[0] == FIELD_SEPARATOR_MARK) {
|
||||
continue;
|
||||
}
|
||||
if (text.getBytes()[0] == FIELD_END_MARK) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (characterRun.isSpecialCharacter() || characterRun.isObj()
|
||||
|| characterRun.isOle2()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
Element inline = createInline();
|
||||
if (characterRun.isBold() != pBold) {
|
||||
WordToFoUtils.setBold(inline, characterRun.isBold());
|
||||
}
|
||||
if (characterRun.isItalic() != pItalic) {
|
||||
WordToFoUtils.setItalic(inline, characterRun.isItalic());
|
||||
}
|
||||
if (!WordToFoUtils.equals(characterRun.getFontName(), pFontName)) {
|
||||
WordToFoUtils.setFontFamily(inline, characterRun.getFontName());
|
||||
}
|
||||
if (characterRun.getFontSize() / 2 != pFontSize) {
|
||||
WordToFoUtils.setFontSize(inline,
|
||||
characterRun.getFontSize() / 2);
|
||||
}
|
||||
WordToFoUtils.setCharactersProperties(characterRun, inline);
|
||||
block.appendChild(inline);
|
||||
|
||||
if (text.endsWith("\r")
|
||||
|| (text.charAt(text.length() - 1) == BEL_MARK && currentTableLevel != 0))
|
||||
text = text.substring(0, text.length() - 1);
|
||||
|
||||
Text textNode = createText(text);
|
||||
inline.appendChild(textNode);
|
||||
|
||||
lineText.append(text);
|
||||
}
|
||||
|
||||
if (lineText.toString().trim().length() == 0) {
|
||||
if ( !haveAnyText )
|
||||
{
|
||||
Element leader = createLeader();
|
||||
block.appendChild( leader );
|
||||
}
|
||||
}
|
||||
finally
|
||||
{
|
||||
blocksProperies.pop();
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
protected void processSection( HWPFDocument hwpfDocument, Section section,
|
||||
int sectionCounter) {
|
||||
int sectionCounter )
|
||||
{
|
||||
String regularPage = createPageMaster(
|
||||
WordToFoUtils.getSectionProperties( section ), "page",
|
||||
sectionCounter );
|
||||
|
@ -435,10 +520,12 @@ public class WordToFoExtractor {
|
|||
}
|
||||
|
||||
protected void processSectionParagraphes( HWPFDocument hwpfDocument,
|
||||
Element flow, Range range, int currentTableLevel) {
|
||||
Element flow, Range range, int currentTableLevel )
|
||||
{
|
||||
final Map<Integer, Table> allTables = new HashMap<Integer, Table>();
|
||||
for ( TableIterator tableIterator = WordToFoUtils.newTableIterator(
|
||||
range, currentTableLevel + 1); tableIterator.hasNext();) {
|
||||
range, currentTableLevel + 1 ); tableIterator.hasNext(); )
|
||||
{
|
||||
Table next = tableIterator.next();
|
||||
allTables.put( Integer.valueOf( next.getStartOffset() ), next );
|
||||
}
|
||||
|
@ -447,11 +534,13 @@ public class WordToFoExtractor {
|
|||
int currentListInfo = 0;
|
||||
|
||||
final int paragraphs = range.numParagraphs();
|
||||
for (int p = 0; p < paragraphs; p++) {
|
||||
for ( int p = 0; p < paragraphs; p++ )
|
||||
{
|
||||
Paragraph paragraph = range.getParagraph( p );
|
||||
|
||||
if ( allTables.containsKey( Integer.valueOf( paragraph
|
||||
.getStartOffset()))) {
|
||||
.getStartOffset() ) ) )
|
||||
{
|
||||
Table table = allTables.get( Integer.valueOf( paragraph
|
||||
.getStartOffset() ) );
|
||||
processTable( hwpfDocument, flow, table, currentTableLevel + 1 );
|
||||
|
@ -459,15 +548,18 @@ public class WordToFoExtractor {
|
|||
}
|
||||
|
||||
if ( paragraph.isInTable()
|
||||
&& paragraph.getTableLevel() != currentTableLevel) {
|
||||
&& paragraph.getTableLevel() != currentTableLevel )
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
if (paragraph.getIlfo() != currentListInfo) {
|
||||
if ( paragraph.getIlfo() != currentListInfo )
|
||||
{
|
||||
currentListInfo = paragraph.getIlfo();
|
||||
}
|
||||
|
||||
if (currentListInfo != 0) {
|
||||
if ( currentListInfo != 0 )
|
||||
{
|
||||
final ListFormatOverride listFormatOverride = listTables
|
||||
.getOverride( paragraph.getIlfo() );
|
||||
|
||||
|
@ -476,7 +568,9 @@ public class WordToFoExtractor {
|
|||
|
||||
processParagraph( hwpfDocument, flow, currentTableLevel,
|
||||
paragraph, label );
|
||||
} else {
|
||||
}
|
||||
else
|
||||
{
|
||||
processParagraph( hwpfDocument, flow, currentTableLevel,
|
||||
paragraph, WordToFoUtils.EMPTY );
|
||||
}
|
||||
|
@ -485,7 +579,8 @@ public class WordToFoExtractor {
|
|||
}
|
||||
|
||||
protected void processTable( HWPFDocument hwpfDocument, Element flow,
|
||||
Table table, int thisTableLevel) {
|
||||
Table table, int thisTableLevel )
|
||||
{
|
||||
Element tableElement = addTable( flow );
|
||||
|
||||
Element tableHeader = createTableHeader();
|
||||
|
@ -494,18 +589,21 @@ public class WordToFoExtractor {
|
|||
final int tableRows = table.numRows();
|
||||
|
||||
int maxColumns = Integer.MIN_VALUE;
|
||||
for (int r = 0; r < tableRows; r++) {
|
||||
for ( int r = 0; r < tableRows; r++ )
|
||||
{
|
||||
maxColumns = Math.max( maxColumns, table.getRow( r ).numCells() );
|
||||
}
|
||||
|
||||
for (int r = 0; r < tableRows; r++) {
|
||||
for ( int r = 0; r < tableRows; r++ )
|
||||
{
|
||||
TableRow tableRow = table.getRow( r );
|
||||
|
||||
Element tableRowElement = createTableRow();
|
||||
WordToFoUtils.setTableRowProperties( tableRow, tableRowElement );
|
||||
|
||||
final int rowCells = tableRow.numCells();
|
||||
for (int c = 0; c < rowCells; c++) {
|
||||
for ( int c = 0; c < rowCells; c++ )
|
||||
{
|
||||
TableCell tableCell = tableRow.getCell( c );
|
||||
|
||||
if ( tableCell.isMerged() && !tableCell.isFirstMerged() )
|
||||
|
@ -520,9 +618,11 @@ public class WordToFoExtractor {
|
|||
tableCellElement, r == 0, r == tableRows - 1, c == 0,
|
||||
c == rowCells - 1 );
|
||||
|
||||
if (tableCell.isFirstMerged()) {
|
||||
if ( tableCell.isFirstMerged() )
|
||||
{
|
||||
int count = 0;
|
||||
for (int c1 = c; c1 < rowCells; c1++) {
|
||||
for ( int c1 = c; c1 < rowCells; c1++ )
|
||||
{
|
||||
TableCell nextCell = tableRow.getCell( c1 );
|
||||
if ( nextCell.isMerged() )
|
||||
count++;
|
||||
|
@ -531,16 +631,22 @@ public class WordToFoExtractor {
|
|||
}
|
||||
tableCellElement.setAttribute( "number-columns-spanned", ""
|
||||
+ count );
|
||||
} else {
|
||||
if (c == rowCells - 1 && c != maxColumns - 1) {
|
||||
tableCellElement.setAttribute("number-columns-spanned",
|
||||
"" + (maxColumns - c));
|
||||
}
|
||||
else
|
||||
{
|
||||
if ( c == rowCells - 1 && c != maxColumns - 1 )
|
||||
{
|
||||
tableCellElement
|
||||
.setAttribute( "number-columns-spanned", ""
|
||||
+ (maxColumns - c) );
|
||||
}
|
||||
}
|
||||
|
||||
if (tableCell.isFirstVerticallyMerged()) {
|
||||
if ( tableCell.isFirstVerticallyMerged() )
|
||||
{
|
||||
int count = 0;
|
||||
for (int r1 = r; r1 < tableRows; r1++) {
|
||||
for ( int r1 = r; r1 < tableRows; r1++ )
|
||||
{
|
||||
TableRow nextRow = table.getRow( r1 );
|
||||
if ( nextRow.numCells() < c )
|
||||
break;
|
||||
|
@ -557,45 +663,59 @@ public class WordToFoExtractor {
|
|||
processSectionParagraphes( hwpfDocument, tableCellElement,
|
||||
tableCell, thisTableLevel );
|
||||
|
||||
if (!tableCellElement.hasChildNodes()) {
|
||||
if ( !tableCellElement.hasChildNodes() )
|
||||
{
|
||||
tableCellElement.appendChild( createBlock() );
|
||||
}
|
||||
|
||||
tableRowElement.appendChild( tableCellElement );
|
||||
}
|
||||
|
||||
if (tableRow.isTableHeader()) {
|
||||
if ( tableRow.isTableHeader() )
|
||||
{
|
||||
tableHeader.appendChild( tableRowElement );
|
||||
} else {
|
||||
}
|
||||
else
|
||||
{
|
||||
tableBody.appendChild( tableRowElement );
|
||||
}
|
||||
}
|
||||
|
||||
if (tableHeader.hasChildNodes()) {
|
||||
if ( tableHeader.hasChildNodes() )
|
||||
{
|
||||
tableElement.appendChild( tableHeader );
|
||||
}
|
||||
if (tableBody.hasChildNodes()) {
|
||||
if ( tableBody.hasChildNodes() )
|
||||
{
|
||||
tableElement.appendChild( tableBody );
|
||||
} else {
|
||||
System.err.println("Table without body");
|
||||
}
|
||||
else
|
||||
{
|
||||
logger.log(
|
||||
POILogger.WARN,
|
||||
"Table without body starting on offset "
|
||||
+ table.getStartOffset() + " -- "
|
||||
+ table.getEndOffset() );
|
||||
}
|
||||
}
|
||||
|
||||
protected int tryImageWithinField(HWPFDocument hwpfDocument,
|
||||
Paragraph paragraph, int beginMark, Element currentBlock) {
|
||||
protected int tryField( HWPFDocument hwpfDocument, Paragraph paragraph,
|
||||
int currentTableLevel, int beginMark, Element currentBlock )
|
||||
{
|
||||
int separatorMark = -1;
|
||||
int pictureMark = -1;
|
||||
int pictureChar = Integer.MIN_VALUE;
|
||||
int endMark = -1;
|
||||
for (int c = beginMark + 1; c < paragraph.numCharacterRuns(); c++) {
|
||||
for ( int c = beginMark + 1; c < paragraph.numCharacterRuns(); c++ )
|
||||
{
|
||||
CharacterRun characterRun = paragraph.getCharacterRun( c );
|
||||
|
||||
String text = characterRun.text();
|
||||
if ( text.getBytes().length == 0 )
|
||||
continue;
|
||||
|
||||
if (text.getBytes()[0] == FIELD_SEPARATOR_MARK) {
|
||||
if (separatorMark != -1) {
|
||||
if ( text.getBytes()[0] == FIELD_SEPARATOR_MARK )
|
||||
{
|
||||
if ( separatorMark != -1 )
|
||||
{
|
||||
// double;
|
||||
return beginMark;
|
||||
}
|
||||
|
@ -604,8 +724,10 @@ public class WordToFoExtractor {
|
|||
continue;
|
||||
}
|
||||
|
||||
if (text.getBytes()[0] == FIELD_END_MARK) {
|
||||
if (endMark != -1) {
|
||||
if ( text.getBytes()[0] == FIELD_END_MARK )
|
||||
{
|
||||
if ( endMark != -1 )
|
||||
{
|
||||
// double;
|
||||
return beginMark;
|
||||
}
|
||||
|
@ -614,63 +736,14 @@ public class WordToFoExtractor {
|
|||
break;
|
||||
}
|
||||
|
||||
if (hwpfDocument.getPicturesTable().hasPicture(characterRun)) {
|
||||
if (c != -1) {
|
||||
// double;
|
||||
return beginMark;
|
||||
}
|
||||
|
||||
pictureMark = c;
|
||||
pictureChar = characterRun.text().charAt(0);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
if (separatorMark == -1 || pictureMark == -1 || endMark == -1)
|
||||
if ( separatorMark == -1 || endMark == -1 )
|
||||
return beginMark;
|
||||
|
||||
final CharacterRun pictureRun = paragraph.getCharacterRun(pictureMark);
|
||||
final Picture picture = hwpfDocument.getPicturesTable().extractPicture(
|
||||
pictureRun, true);
|
||||
|
||||
processImage(currentBlock, pictureChar == 0x01, picture);
|
||||
processField( hwpfDocument, currentBlock, paragraph, currentTableLevel,
|
||||
beginMark, separatorMark, endMark );
|
||||
|
||||
return endMark;
|
||||
}
|
||||
|
||||
/**
|
||||
* Java main() interface to interact with WordToFoExtractor
|
||||
*
|
||||
* <p>
|
||||
* Usage: WordToFoExtractor infile outfile
|
||||
* </p>
|
||||
* Where infile is an input .doc file ( Word 97-2007)
|
||||
* which will be rendered as XSL-FO into outfile
|
||||
*
|
||||
*/
|
||||
public static void main(String[] args) {
|
||||
if (args.length < 2) {
|
||||
System.err.println("Usage: WordToFoExtractor <inputFile.doc> <saveTo.fo>");
|
||||
return;
|
||||
}
|
||||
|
||||
System.out.println("Converting " + args[0]);
|
||||
System.out.println("Saving output to " + args[1]);
|
||||
try {
|
||||
Document doc = WordToFoExtractor.process(new File(args[0]));
|
||||
|
||||
FileWriter out = new FileWriter(args[1]);
|
||||
DOMSource domSource = new DOMSource(doc);
|
||||
StreamResult streamResult = new StreamResult(out);
|
||||
TransformerFactory tf = TransformerFactory.newInstance();
|
||||
Transformer serializer = tf.newTransformer();
|
||||
serializer.setOutputProperty(OutputKeys.ENCODING, "UTF-8"); // TODO set encoding from a command argument
|
||||
serializer.setOutputProperty(OutputKeys.INDENT, "yes");
|
||||
serializer.transform(domSource, streamResult);
|
||||
out.close();
|
||||
} catch (Exception e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -0,0 +1,95 @@
|
|||
/*
|
||||
* ====================================================================
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
* ====================================================================
|
||||
*/
|
||||
package org.apache.poi.hwpf.extractor;
|
||||
|
||||
import java.io.StringWriter;
|
||||
|
||||
import javax.xml.parsers.DocumentBuilderFactory;
|
||||
import javax.xml.transform.OutputKeys;
|
||||
import javax.xml.transform.Transformer;
|
||||
import javax.xml.transform.TransformerFactory;
|
||||
import javax.xml.transform.dom.DOMSource;
|
||||
import javax.xml.transform.stream.StreamResult;
|
||||
|
||||
import junit.framework.TestCase;
|
||||
import org.apache.poi.POIDataSamples;
|
||||
import org.apache.poi.hwpf.HWPFDocument;
|
||||
|
||||
/**
|
||||
* Test cases for {@link WordToFoExtractor}
|
||||
*
|
||||
* @author Sergey Vladimirov (vlsergey {at} gmail {dot} com)
|
||||
*/
|
||||
public class TestWordToFoExtractor extends TestCase
|
||||
{
|
||||
private static String getFoText( final String sampleFileName )
|
||||
throws Exception
|
||||
{
|
||||
HWPFDocument hwpfDocument = new HWPFDocument( POIDataSamples
|
||||
.getDocumentInstance().openResourceAsStream( sampleFileName ) );
|
||||
|
||||
WordToFoExtractor wordToFoExtractor = new WordToFoExtractor(
|
||||
DocumentBuilderFactory.newInstance().newDocumentBuilder()
|
||||
.newDocument() );
|
||||
wordToFoExtractor.processDocument( hwpfDocument );
|
||||
|
||||
StringWriter stringWriter = new StringWriter();
|
||||
|
||||
Transformer transformer = TransformerFactory.newInstance()
|
||||
.newTransformer();
|
||||
transformer.setOutputProperty( OutputKeys.INDENT, "yes" );
|
||||
transformer.transform(
|
||||
new DOMSource( wordToFoExtractor.getDocument() ),
|
||||
new StreamResult( stringWriter ) );
|
||||
|
||||
String result = stringWriter.toString();
|
||||
return result;
|
||||
}
|
||||
|
||||
public void testHyperlink() throws Exception
|
||||
{
|
||||
final String sampleFileName = "hyperlink.doc";
|
||||
String result = getFoText( sampleFileName );
|
||||
|
||||
assertTrue( result
|
||||
.contains( "<fo:basic-link external-destination=\"http://testuri.org/\">" ) );
|
||||
assertTrue( result.contains( "Hyperlink text" ) );
|
||||
}
|
||||
|
||||
public void testEquation() throws Exception
|
||||
{
|
||||
final String sampleFileName = "equation.doc";
|
||||
String result = getFoText( sampleFileName );
|
||||
|
||||
assertTrue( result
|
||||
.contains( "<!--Image link to '0.emf' can be here-->" ) );
|
||||
}
|
||||
|
||||
public void testPageref() throws Exception
|
||||
{
|
||||
final String sampleFileName = "pageref.doc";
|
||||
String result = getFoText( sampleFileName );
|
||||
|
||||
System.out.println( result );
|
||||
|
||||
assertTrue( result
|
||||
.contains( "<fo:basic-link internal-destination=\"userref\">" ) );
|
||||
assertTrue( result.contains( "1" ) );
|
||||
}
|
||||
}
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Loading…
Reference in New Issue