mirror of https://github.com/apache/poi.git
[TIKA-3388] issue with non-ascii chars in file name of embedded OLE object
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1903780 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
4df0415621
commit
d00be6e7db
|
@ -32,10 +32,14 @@ import org.apache.commons.compress.archivers.zip.ZipArchiveEntry;
|
|||
import org.apache.commons.compress.archivers.zip.ZipFile;
|
||||
import org.apache.poi.POIDataSamples;
|
||||
import org.apache.poi.openxml4j.opc.OPCPackage;
|
||||
import org.apache.poi.openxml4j.opc.PackagePart;
|
||||
import org.apache.poi.openxml4j.opc.PackagePartName;
|
||||
import org.apache.poi.openxml4j.opc.PackagingURIHelper;
|
||||
import org.apache.poi.poifs.crypt.CipherAlgorithm;
|
||||
import org.apache.poi.poifs.crypt.Decryptor;
|
||||
import org.apache.poi.poifs.crypt.EncryptionInfo;
|
||||
import org.apache.poi.poifs.crypt.HashAlgorithm;
|
||||
import org.apache.poi.poifs.filesystem.Ole10Native;
|
||||
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
|
||||
import org.apache.poi.xwpf.extractor.XWPFWordExtractor;
|
||||
import org.apache.poi.xwpf.usermodel.XWPFDocument;
|
||||
|
@ -159,4 +163,22 @@ class TestXWPFBugs {
|
|||
assertEquals(731, document.getParagraphs().size());
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
void tika3388() throws Exception {
|
||||
try (XWPFDocument document = new XWPFDocument(samples.openResourceAsStream("tika-3388.docx"))) {
|
||||
assertEquals(1, document.getParagraphs().size());
|
||||
PackagePartName partName = PackagingURIHelper.createPartName("/word/embeddings/oleObject1.bin");
|
||||
PackagePart part = document.getPackage().getPart(partName);
|
||||
assertNotNull(part);
|
||||
try (
|
||||
InputStream partStream = part.getInputStream();
|
||||
POIFSFileSystem poifs = new POIFSFileSystem(partStream)
|
||||
) {
|
||||
Ole10Native ole = Ole10Native.createFromEmbeddedOleObject(poifs);
|
||||
assertEquals("C:\\Users\\ross\\AppData\\Local\\Microsoft\\Windows\\INetCache\\Content.Word\\約翰的測試文件\uD83D\uDD96.msg",
|
||||
ole.getFileName());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -48,7 +48,7 @@ public class Ole10Native {
|
|||
|
||||
|
||||
public static final String OLE10_NATIVE = "\u0001Ole10Native";
|
||||
private static final Charset ISO1 = StandardCharsets.ISO_8859_1;
|
||||
private static final Charset UTF8 = StandardCharsets.UTF_8;
|
||||
// arbitrarily selected; may need to increase
|
||||
private static final int DEFAULT_MAX_RECORD_LENGTH = 100_000_000;
|
||||
private static int MAX_RECORD_LENGTH = DEFAULT_MAX_RECORD_LENGTH;
|
||||
|
@ -407,14 +407,14 @@ public class Ole10Native {
|
|||
// total size, will be determined later ..
|
||||
|
||||
leos.writeShort(getFlags1());
|
||||
leos.write(getLabel().getBytes(ISO1));
|
||||
leos.write(getLabel().getBytes(UTF8));
|
||||
leos.write(0);
|
||||
leos.write(getFileName().getBytes(ISO1));
|
||||
leos.write(getFileName().getBytes(UTF8));
|
||||
leos.write(0);
|
||||
leos.writeShort(getFlags2());
|
||||
leos.writeShort(getUnknown1());
|
||||
leos.writeInt(getCommand().length() + 1);
|
||||
leos.write(getCommand().getBytes(ISO1));
|
||||
leos.write(getCommand().getBytes(UTF8));
|
||||
leos.write(0);
|
||||
leos.writeInt(getDataSize());
|
||||
leos.write(getDataBuffer());
|
||||
|
|
|
@ -135,13 +135,13 @@ public final class StringUtil {
|
|||
final int offset,
|
||||
final int len) {
|
||||
int len_to_use = Math.min(len, string.length - offset);
|
||||
return new String(string, offset, len_to_use, ISO_8859_1);
|
||||
return new String(string, offset, len_to_use, UTF8);
|
||||
}
|
||||
|
||||
public static String readCompressedUnicode(LittleEndianInput in, int nChars) {
|
||||
byte[] buf = IOUtils.safelyAllocate(nChars, MAX_RECORD_LENGTH);
|
||||
in.readFully(buf);
|
||||
return new String(buf, ISO_8859_1);
|
||||
return new String(buf, UTF8);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
Binary file not shown.
Loading…
Reference in New Issue