mirror of https://github.com/apache/poi.git
add TOC support
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1189145 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
a3145d1d8e
commit
c32d036166
|
@ -78,6 +78,13 @@ public abstract class AbstractWordConverter
|
|||
this.structure = field;
|
||||
}
|
||||
|
||||
Structure( int start, int end )
|
||||
{
|
||||
this.start = start;
|
||||
this.end = end;
|
||||
this.structure = null;
|
||||
}
|
||||
|
||||
public int compareTo( Structure o )
|
||||
{
|
||||
return start < o.start ? -1 : start == o.start ? 0 : 1;
|
||||
|
@ -102,6 +109,15 @@ public abstract class AbstractWordConverter
|
|||
private static final POILogger logger = POILogFactory
|
||||
.getLogger( AbstractWordConverter.class );
|
||||
|
||||
private static final Pattern PATTERN_HYPERLINK_EXTERNAL = Pattern
|
||||
.compile( "^[ \\t\\r\\n]*HYPERLINK \"(.*)\".*$" );
|
||||
|
||||
private static final Pattern PATTERN_HYPERLINK_LOCAL = Pattern
|
||||
.compile( "^[ \\t\\r\\n]*HYPERLINK \\\\l \"(.*)\"[ ](.*)$" );
|
||||
|
||||
private static final Pattern PATTERN_PAGEREF = Pattern
|
||||
.compile( "^[ \\t\\r\\n]*PAGEREF ([^ ]*)[ \\t\\r\\n]*\\\\h.*$" );
|
||||
|
||||
private static final byte SPECCHAR_AUTONUMBERED_FOOTNOTE_REFERENCE = 2;
|
||||
|
||||
private static final byte SPECCHAR_DRAWN_OBJECT = 8;
|
||||
|
@ -291,17 +307,38 @@ public abstract class AbstractWordConverter
|
|||
}
|
||||
|
||||
// TODO: dead fields?
|
||||
int skipUntil = -1;
|
||||
for ( int c = 0; c < range.numCharacterRuns(); c++ )
|
||||
{
|
||||
CharacterRun characterRun = range.getCharacterRun( c );
|
||||
if ( characterRun == null )
|
||||
throw new AssertionError();
|
||||
if ( characterRun.getStartOffset() < skipUntil )
|
||||
continue;
|
||||
String text = characterRun.text();
|
||||
if ( text == null || text.length() == 0
|
||||
|| text.charAt( 0 ) != FIELD_BEGIN_MARK )
|
||||
continue;
|
||||
|
||||
Field aliveField = ( (HWPFDocument) wordDocument ).getFields()
|
||||
.getFieldByStartOffset( FieldsDocumentPart.MAIN,
|
||||
characterRun.getStartOffset() );
|
||||
if ( aliveField != null )
|
||||
{
|
||||
addToStructures( structures, new Structure( aliveField ) );
|
||||
skipUntil = aliveField.getFieldEndOffset() + 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
int[] separatorEnd = tryDeadField_lookupFieldSeparatorEnd(
|
||||
wordDocument, range, c );
|
||||
if ( separatorEnd != null )
|
||||
{
|
||||
addToStructures( structures, new Structure(
|
||||
characterRun.getStartOffset(),
|
||||
separatorEnd[1] + 1 ) );
|
||||
c = separatorEnd[1];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -562,6 +599,39 @@ public abstract class AbstractWordConverter
|
|||
Element currentBlock, Range range, int currentTableLevel,
|
||||
int beginMark, int separatorMark, int endMark )
|
||||
{
|
||||
if ( beginMark + 1 < separatorMark && separatorMark + 1 < endMark )
|
||||
{
|
||||
Range formulaRange = new Range( range.getCharacterRun(
|
||||
beginMark + 1 ).getStartOffset(), range.getCharacterRun(
|
||||
separatorMark - 1 ).getEndOffset(), range )
|
||||
{
|
||||
@Override
|
||||
public String toString()
|
||||
{
|
||||
return "Dead field formula subrange: " + super.toString();
|
||||
}
|
||||
};
|
||||
Range valueRange = new Range( range.getCharacterRun(
|
||||
separatorMark + 1 ).getStartOffset(), range
|
||||
.getCharacterRun( endMark - 1 ).getEndOffset(), range )
|
||||
{
|
||||
@Override
|
||||
public String toString()
|
||||
{
|
||||
return "Dead field value subrange: " + super.toString();
|
||||
}
|
||||
};
|
||||
String formula = formulaRange.text();
|
||||
final Matcher matcher = PATTERN_HYPERLINK_LOCAL.matcher( formula );
|
||||
if ( matcher.matches() )
|
||||
{
|
||||
String localref = matcher.group( 1 );
|
||||
processPageref( wordDocument, currentBlock, valueRange,
|
||||
currentTableLevel, localref );
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
StringBuilder debug = new StringBuilder( "Unsupported field type: \n" );
|
||||
for ( int i = beginMark; i <= endMark; i++ )
|
||||
{
|
||||
|
@ -706,9 +776,7 @@ public abstract class AbstractWordConverter
|
|||
if ( firstSubrange != null )
|
||||
{
|
||||
String formula = firstSubrange.text();
|
||||
Pattern pagerefPattern = Pattern
|
||||
.compile( "[ \\t\\r\\n]*PAGEREF ([^ ]*)[ \\t\\r\\n]*\\\\h[ \\t\\r\\n]*" );
|
||||
Matcher matcher = pagerefPattern.matcher( formula );
|
||||
Matcher matcher = PATTERN_PAGEREF.matcher( formula );
|
||||
if ( matcher.find() )
|
||||
{
|
||||
String pageref = matcher.group( 1 );
|
||||
|
@ -756,10 +824,8 @@ public abstract class AbstractWordConverter
|
|||
if ( firstSubrange != null )
|
||||
{
|
||||
String formula = firstSubrange.text();
|
||||
Pattern hyperlinkPattern = Pattern
|
||||
.compile( "[ \\t\\r\\n]*HYPERLINK \"(.*)\"[ \\t\\r\\n]*" );
|
||||
Matcher matcher = hyperlinkPattern.matcher( formula );
|
||||
if ( matcher.find() )
|
||||
Matcher matcher = PATTERN_HYPERLINK_EXTERNAL.matcher( formula );
|
||||
if ( matcher.matches() )
|
||||
{
|
||||
String hyperlink = matcher.group( 1 );
|
||||
processHyperlink( wordDocument, currentBlock,
|
||||
|
@ -767,6 +833,30 @@ public abstract class AbstractWordConverter
|
|||
currentTableLevel, hyperlink );
|
||||
return;
|
||||
}
|
||||
matcher.usePattern( PATTERN_HYPERLINK_LOCAL );
|
||||
if ( matcher.matches() )
|
||||
{
|
||||
String hyperlink = matcher.group( 1 );
|
||||
Range textRange = null;
|
||||
String text = matcher.group( 2 );
|
||||
if ( AbstractWordUtils.isNotEmpty( text ) )
|
||||
{
|
||||
textRange = new Range( firstSubrange.getStartOffset()
|
||||
+ matcher.start( 2 ),
|
||||
firstSubrange.getStartOffset()
|
||||
+ matcher.end( 2 ), firstSubrange )
|
||||
{
|
||||
@Override
|
||||
public String toString()
|
||||
{
|
||||
return "Local hyperlink text";
|
||||
}
|
||||
};
|
||||
}
|
||||
processPageref( wordDocument, currentBlock, textRange,
|
||||
currentTableLevel, hyperlink );
|
||||
return;
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
@ -817,13 +907,13 @@ public abstract class AbstractWordConverter
|
|||
|
||||
}
|
||||
|
||||
protected abstract void processImage( Element currentBlock,
|
||||
boolean inlined, Picture picture, String url );
|
||||
|
||||
@Internal
|
||||
protected abstract void processImageWithoutPicturesManager(
|
||||
Element currentBlock, boolean inlined, Picture picture );
|
||||
|
||||
protected abstract void processImage( Element currentBlock,
|
||||
boolean inlined, Picture picture, String url );
|
||||
|
||||
protected abstract void processLineBreak( Element block,
|
||||
CharacterRun characterRun );
|
||||
|
||||
|
@ -1021,6 +1111,20 @@ public abstract class AbstractWordConverter
|
|||
|
||||
protected int tryDeadField( HWPFDocumentCore wordDocument, Range range,
|
||||
int currentTableLevel, int beginMark, Element currentBlock )
|
||||
{
|
||||
int[] separatorEnd = tryDeadField_lookupFieldSeparatorEnd(
|
||||
wordDocument, range, beginMark );
|
||||
if ( separatorEnd == null )
|
||||
return beginMark;
|
||||
|
||||
processDeadField( wordDocument, currentBlock, range, currentTableLevel,
|
||||
beginMark, separatorEnd[0], separatorEnd[1] );
|
||||
|
||||
return separatorEnd[1];
|
||||
}
|
||||
|
||||
private int[] tryDeadField_lookupFieldSeparatorEnd(
|
||||
HWPFDocumentCore wordDocument, Range range, int beginMark )
|
||||
{
|
||||
int separatorMark = -1;
|
||||
int endMark = -1;
|
||||
|
@ -1032,28 +1136,24 @@ public abstract class AbstractWordConverter
|
|||
if ( text.getBytes().length == 0 )
|
||||
continue;
|
||||
|
||||
if ( text.getBytes()[0] == FIELD_BEGIN_MARK )
|
||||
final byte firstByte = text.getBytes()[0];
|
||||
if ( firstByte == FIELD_BEGIN_MARK )
|
||||
{
|
||||
// nested?
|
||||
Field possibleField = processDeadField( wordDocument, range,
|
||||
currentTableLevel, characterRun.getStartOffset(),
|
||||
currentBlock );
|
||||
if ( possibleField != null )
|
||||
int[] nested = tryDeadField_lookupFieldSeparatorEnd(
|
||||
wordDocument, range, c );
|
||||
if ( nested != null )
|
||||
{
|
||||
c = possibleField.getFieldEndOffset();
|
||||
}
|
||||
else
|
||||
{
|
||||
continue;
|
||||
c = nested[1];
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
if ( text.getBytes()[0] == FIELD_SEPARATOR_MARK )
|
||||
if ( firstByte == FIELD_SEPARATOR_MARK )
|
||||
{
|
||||
if ( separatorMark != -1 )
|
||||
{
|
||||
// double;
|
||||
return beginMark;
|
||||
// double; incorrect format
|
||||
return null;
|
||||
}
|
||||
|
||||
separatorMark = c;
|
||||
|
@ -1065,22 +1165,18 @@ public abstract class AbstractWordConverter
|
|||
if ( endMark != -1 )
|
||||
{
|
||||
// double;
|
||||
return beginMark;
|
||||
return null;
|
||||
}
|
||||
|
||||
endMark = c;
|
||||
break;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
if ( separatorMark == -1 || endMark == -1 )
|
||||
return beginMark;
|
||||
return null;
|
||||
|
||||
processDeadField( wordDocument, currentBlock, range, currentTableLevel,
|
||||
beginMark, separatorMark, endMark );
|
||||
|
||||
return endMark;
|
||||
return new int[] { separatorMark, endMark };
|
||||
}
|
||||
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue