mirror of https://github.com/apache/poi.git
Bugzilla 53205 - Fix some parsing errors and encoding issues in HDGF
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1365638 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
f8988b11af
commit
302a48a25b
|
@ -34,6 +34,7 @@
|
|||
|
||||
<changes>
|
||||
<release version="3.9-beta1" date="2012-??-??">
|
||||
<action dev="poi-developers" type="fix">53205 - Fix some parsing errors and encoding issues in HDGF </action>
|
||||
<action dev="poi-developers" type="add">53204 - Improved performanceof PageSettingsBlock in HSSF </action>
|
||||
<action dev="poi-developers" type="add">53500 - Getter for repeating rows and columns</action>
|
||||
<action dev="poi-developers" type="fix">53369 - Fixed tests failing on JDK 1.7</action>
|
||||
|
|
|
@ -161,6 +161,7 @@ public final class Chunk {
|
|||
continue;
|
||||
}
|
||||
|
||||
try {
|
||||
// Process
|
||||
switch(type) {
|
||||
// Types 0->7 = a flat at bit 0->7
|
||||
|
@ -199,8 +200,8 @@ public final class Chunk {
|
|||
endsAt = contents.length;
|
||||
}
|
||||
|
||||
int strLen = (endsAt-startsAt) / 2;
|
||||
command.value = StringUtil.getFromUnicodeLE(contents, startsAt, strLen);
|
||||
int strLen = endsAt - startsAt;
|
||||
command.value = new String(contents, startsAt, strLen, header.getChunkCharset().name());
|
||||
break;
|
||||
case 25:
|
||||
command.value = Short.valueOf(
|
||||
|
@ -226,6 +227,11 @@ public final class Chunk {
|
|||
logger.log(POILogger.INFO,
|
||||
"Command of type " + type + " not processed!");
|
||||
}
|
||||
}
|
||||
catch (Exception e) {
|
||||
logger.log(POILogger.ERROR, "Unexpected error processing command, ignoring and continuing. Command: " +
|
||||
command, e);
|
||||
}
|
||||
|
||||
// Add to the array
|
||||
commands.add(command);
|
||||
|
|
|
@ -19,6 +19,8 @@ package org.apache.poi.hdgf.chunks;
|
|||
|
||||
import org.apache.poi.util.LittleEndian;
|
||||
|
||||
import java.nio.charset.Charset;
|
||||
|
||||
/**
|
||||
* A chunk header
|
||||
*/
|
||||
|
@ -80,6 +82,7 @@ public abstract class ChunkHeader {
|
|||
public abstract int getSizeInBytes();
|
||||
public abstract boolean hasTrailer();
|
||||
public abstract boolean hasSeparator();
|
||||
public abstract Charset getChunkCharset();
|
||||
|
||||
/**
|
||||
* Returns the ID/IX of the chunk
|
||||
|
|
|
@ -17,6 +17,8 @@
|
|||
|
||||
package org.apache.poi.hdgf.chunks;
|
||||
|
||||
import java.nio.charset.Charset;
|
||||
|
||||
/**
|
||||
* A chunk header from v11+
|
||||
*/
|
||||
|
@ -42,4 +44,9 @@ public final class ChunkHeaderV11 extends ChunkHeaderV6 {
|
|||
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Charset getChunkCharset() {
|
||||
return Charset.forName("UTF-16LE");
|
||||
}
|
||||
}
|
||||
|
|
|
@ -17,6 +17,8 @@
|
|||
|
||||
package org.apache.poi.hdgf.chunks;
|
||||
|
||||
import java.nio.charset.Charset;
|
||||
|
||||
/**
|
||||
* A chunk header from v4 or v5
|
||||
*/
|
||||
|
@ -54,4 +56,9 @@ public final class ChunkHeaderV4V5 extends ChunkHeader {
|
|||
// V4 and V5 never has separators
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Charset getChunkCharset() {
|
||||
return Charset.forName("ASCII");
|
||||
}
|
||||
}
|
||||
|
|
|
@ -17,6 +17,8 @@
|
|||
|
||||
package org.apache.poi.hdgf.chunks;
|
||||
|
||||
import java.nio.charset.Charset;
|
||||
|
||||
/**
|
||||
* A chunk header from v6
|
||||
*/
|
||||
|
@ -59,4 +61,9 @@ public class ChunkHeaderV6 extends ChunkHeader {
|
|||
// V6 never has separators
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Charset getChunkCharset() {
|
||||
return Charset.forName("ASCII");
|
||||
}
|
||||
}
|
||||
|
|
|
@ -52,6 +52,7 @@ public final class ChunkStream extends Stream {
|
|||
|
||||
int pos = 0;
|
||||
byte[] contents = getStore().getContents();
|
||||
try {
|
||||
while(pos < contents.length) {
|
||||
// Ensure we have enough data to create a chunk from
|
||||
int headerSize = ChunkHeader.getHeaderSize(chunkFactory.getVersion());
|
||||
|
@ -65,6 +66,11 @@ public final class ChunkStream extends Stream {
|
|||
pos = contents.length;
|
||||
}
|
||||
}
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
System.err.println("Failed to create chunk at " + pos + ", ignoring rest of data." + e);
|
||||
}
|
||||
|
||||
chunks = chunksA.toArray(new Chunk[chunksA.size()]);
|
||||
}
|
||||
|
|
|
@ -17,6 +17,7 @@
|
|||
|
||||
package org.apache.poi.hdgf;
|
||||
|
||||
import org.apache.poi.hdgf.extractor.VisioTextExtractor;
|
||||
import org.apache.poi.hdgf.streams.PointerContainingStream;
|
||||
import org.apache.poi.hdgf.streams.TrailerStream;
|
||||
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
|
||||
|
@ -88,4 +89,28 @@ public final class TestHDGFCore extends TestCase {
|
|||
HDGFDiagram hdgf = new HDGFDiagram(fs);
|
||||
assertNotNull(hdgf);
|
||||
}
|
||||
|
||||
public void testV6NonUtf16LE() throws Exception {
|
||||
fs = new POIFSFileSystem(_dgTests.openResourceAsStream("v6-non-utf16le.vsd"));
|
||||
|
||||
HDGFDiagram hdgf = new HDGFDiagram(fs);
|
||||
assertNotNull(hdgf);
|
||||
|
||||
VisioTextExtractor textExtractor = new VisioTextExtractor(hdgf);
|
||||
String text = textExtractor.getText().replace("\u0000", "").trim();
|
||||
|
||||
assertEquals("Table\n\n\nPropertySheet\n\n\n\nPropertySheetField", text);
|
||||
}
|
||||
|
||||
public void testUtf16LE() throws Exception {
|
||||
fs = new POIFSFileSystem(_dgTests.openResourceAsStream("Test_Visio-Some_Random_Text.vsd"));
|
||||
|
||||
HDGFDiagram hdgf = new HDGFDiagram(fs);
|
||||
assertNotNull(hdgf);
|
||||
|
||||
VisioTextExtractor textExtractor = new VisioTextExtractor(hdgf);
|
||||
String text = textExtractor.getText().trim();
|
||||
|
||||
assertEquals("text\nView\nTest View\nI am a test view\nSome random text, on a page", text);
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue