fix test case for 45473: calculate PAPX boundaries basing on char positions, not on previously read byte positions (they are outdated); fix boundaries checks (again)

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1143753 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Sergey Vladimirov 2011-07-07 10:39:27 +00:00
parent 1757e4af53
commit 5911ff3bdb
7 changed files with 103 additions and 24 deletions

View File

@ -18,6 +18,15 @@
package org.apache.poi.hwpf.model; package org.apache.poi.hwpf.model;
public interface CharIndexTranslator { public interface CharIndexTranslator {
/**
* Calculates the byte index of the given char index.
*
* @param charPos
* The char position
* @return The byte index
*/
int getByteIndex( int charPos );
/** /**
* Calculates the char index of the given byte index. * Calculates the char index of the given byte index.
* Look forward if index is not in table * Look forward if index is not in table

View File

@ -74,6 +74,11 @@ public final class OldSectionTable extends SectionTable
this.tpt = tpt; this.tpt = tpt;
} }
public int getByteIndex( int charPos )
{
return charPos;
}
public int getCharIndex(int bytePos, int startCP) { public int getCharIndex(int bytePos, int startCP) {
return bytePos; return bytePos;
} }

View File

@ -17,13 +17,13 @@
package org.apache.poi.hwpf.model; package org.apache.poi.hwpf.model;
import java.util.ArrayList;
import java.io.IOException; import java.io.IOException;
import java.io.OutputStream; import java.io.OutputStream;
import java.util.ArrayList;
import org.apache.poi.hwpf.model.io.*; import org.apache.poi.hwpf.model.io.HWPFFileSystem;
import org.apache.poi.hwpf.model.io.HWPFOutputStream;
import org.apache.poi.hwpf.sprm.SprmBuffer; import org.apache.poi.hwpf.sprm.SprmBuffer;
import org.apache.poi.poifs.common.POIFSConstants; import org.apache.poi.poifs.common.POIFSConstants;
import org.apache.poi.util.LittleEndian; import org.apache.poi.util.LittleEndian;
@ -223,7 +223,7 @@ public class PAPBinTable
PAPFormattedDiskPage pfkp = new PAPFormattedDiskPage(_dataStream); PAPFormattedDiskPage pfkp = new PAPFormattedDiskPage(_dataStream);
pfkp.fill(overflow); pfkp.fill(overflow);
byte[] bufFkp = pfkp.toByteArray(fcMin); byte[] bufFkp = pfkp.toByteArray(tpt, fcMin);
docStream.write(bufFkp); docStream.write(bufFkp);
overflow = pfkp.getOverflow(); overflow = pfkp.getOverflow();

View File

@ -17,11 +17,11 @@
package org.apache.poi.hwpf.model; package org.apache.poi.hwpf.model;
import org.apache.poi.util.LittleEndian;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.List;
import java.util.Arrays; import java.util.Arrays;
import java.util.List;
import org.apache.poi.util.LittleEndian;
/** /**
* Represents a PAP FKP. The style properties for paragraph and character runs * Represents a PAP FKP. The style properties for paragraph and character runs
@ -137,7 +137,7 @@ public final class PAPFormattedDiskPage extends FormattedDiskPage {
* @param fcMin The file offset in the main stream where text begins. * @param fcMin The file offset in the main stream where text begins.
* @return A byte array representing this data structure. * @return A byte array representing this data structure.
*/ */
protected byte[] toByteArray(int fcMin) protected byte[] toByteArray(CharIndexTranslator translator, int fcMin)
{ {
byte[] buf = new byte[512]; byte[] buf = new byte[512];
int size = _papxList.size(); int size = _papxList.size();
@ -152,7 +152,7 @@ public final class PAPFormattedDiskPage extends FormattedDiskPage {
int index = 0; int index = 0;
for (; index < size; index++) for (; index < size; index++)
{ {
byte[] grpprl = ((PAPX)_papxList.get(index)).getGrpprl(); byte[] grpprl = _papxList.get(index).getGrpprl();
int grpprlLength = grpprl.length; int grpprlLength = grpprl.length;
// is grpprl huge? // is grpprl huge?
@ -255,7 +255,10 @@ public final class PAPFormattedDiskPage extends FormattedDiskPage {
grpprlOffset -= (grpprl.length + (2 - grpprl.length % 2)); grpprlOffset -= (grpprl.length + (2 - grpprl.length % 2));
grpprlOffset -= (grpprlOffset % 2); grpprlOffset -= (grpprlOffset % 2);
} }
LittleEndian.putInt(buf, fcOffset, papx.getStartBytes() + fcMin); // LittleEndian.putInt( buf, fcOffset,
// papx.getStartBytes() );
LittleEndian.putInt( buf, fcOffset,
translator.getByteIndex( papx.getStart() ) );
buf[bxOffset] = (byte)(grpprlOffset/2); buf[bxOffset] = (byte)(grpprlOffset/2);
System.arraycopy(phe, 0, buf, bxOffset + 1, phe.length); System.arraycopy(phe, 0, buf, bxOffset + 1, phe.length);
@ -283,7 +286,9 @@ public final class PAPFormattedDiskPage extends FormattedDiskPage {
} }
LittleEndian.putInt(buf, fcOffset, papx.getEndBytes() + fcMin); // LittleEndian.putInt(buf, fcOffset, papx.getEndBytes() + fcMin);
LittleEndian.putInt( buf, fcOffset,
translator.getByteIndex( papx.getEnd() ) );
return buf; return buf;
} }

View File

@ -123,6 +123,31 @@ public final class PAPX extends BytePropertyNode<PAPX> {
return (SprmBuffer)_buf; return (SprmBuffer)_buf;
} }
/**
* @deprecated Though bytes are actually stored in file, it is advised to
* use char positions for all operations. Including save
* operations, because only char positions are preserved.
*/
@Deprecated
@Override
public int getEndBytes()
{
return super.getEndBytes();
}
/**
* @deprecated Though bytes are actually stored in file, it is advised to
* use char positions for all operations. Including save
* operations, because only char positions are preserved.
*/
@Deprecated
@Override
public int getStartBytes()
{
// TODO Auto-generated method stub
return super.getStartBytes();
}
public ParagraphProperties getParagraphProperties(StyleSheet ss) public ParagraphProperties getParagraphProperties(StyleSheet ss)
{ {
if(ss == null) { if(ss == null) {

View File

@ -179,14 +179,22 @@ public class SectionTable
// add the section descriptor bytes to the PlexOfCps. // add the section descriptor bytes to the PlexOfCps.
/* original line */
// original line - // GenericPropertyNode property = new
//GenericPropertyNode property = new GenericPropertyNode(sepx.getStart(), sepx.getEnd(), sed.toByteArray()); // GenericPropertyNode(sepx.getStart(), sepx.getEnd(),
// sed.toByteArray());
// Line using Ryan's FCtoCP() conversion method - /*
// unable to observe any effect on our testcases when using this code - piers * Line using Ryan's FCtoCP() conversion method - unable to observe
GenericPropertyNode property = new GenericPropertyNode(tpt.getCharIndex(sepx.getStartBytes()), tpt.getCharIndex(sepx.getEndBytes()), sed.toByteArray()); * any effect on our testcases when using this code - piers
*/
/*
* there is an effect on Bug45743.doc actually. writeoutreadback
* changes byte offset of chars (but preserve string offsets) -
* sergey
*/
GenericPropertyNode property = new GenericPropertyNode(
tpt.getCharIndex( sepx.getStartBytes() ),
tpt.getCharIndex( sepx.getEndBytes() ), sed.toByteArray() );
plex.addProperty(property); plex.addProperty(property);

View File

@ -17,15 +17,15 @@
package org.apache.poi.hwpf.model; package org.apache.poi.hwpf.model;
import org.apache.poi.hwpf.model.io.HWPFOutputStream;
import org.apache.poi.poifs.common.POIFSConstants;
import java.io.IOException; import java.io.IOException;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Collections; import java.util.Collections;
import java.util.Comparator; import java.util.Comparator;
import java.util.List; import java.util.List;
import org.apache.poi.hwpf.model.io.HWPFOutputStream;
import org.apache.poi.poifs.common.POIFSConstants;
/** /**
* The piece table for matching up character positions to bits of text. This * The piece table for matching up character positions to bits of text. This
* mostly works in bytes, but the TextPieces themselves work in characters. This * mostly works in bytes, but the TextPieces themselves work in characters. This
@ -197,6 +197,33 @@ public class TextPieceTable implements CharIndexTranslator {
return false; return false;
} }
public int getByteIndex( int charPos )
{
int byteCount = 0;
for ( TextPiece tp : _textPieces )
{
if ( charPos >= tp.getEnd() )
{
byteCount = tp.getPieceDescriptor().getFilePosition()
+ ( tp.getEnd() - tp.getStart() )
* ( tp.isUnicode() ? 2 : 1 );
if ( charPos == tp.getEnd() )
break;
continue;
}
if ( charPos < tp.getEnd() )
{
int left = charPos - tp.getStart();
byteCount = tp.getPieceDescriptor().getFilePosition() + left
* ( tp.isUnicode() ? 2 : 1 );
break;
}
}
return byteCount;
}
public int getCharIndex(int bytePos) { public int getCharIndex(int bytePos) {
return getCharIndex(bytePos, 0); return getCharIndex(bytePos, 0);
} }
@ -297,7 +324,7 @@ public class TextPieceTable implements CharIndexTranslator {
for(TextPiece tp : _textPiecesFCOrder) { for(TextPiece tp : _textPiecesFCOrder) {
int pieceStart = tp.getPieceDescriptor().getFilePosition(); int pieceStart = tp.getPieceDescriptor().getFilePosition();
if (startBytePos > pieceStart + tp.bytesLength()) { if (startBytePos >= pieceStart + tp.bytesLength()) {
continue; continue;
} }