mirror of https://github.com/apache/poi.git
convert tabs to spaces
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1890125 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
212a7b9655
commit
0eb475ee3a
|
@ -46,7 +46,7 @@ public abstract class AbstractFileHandler implements FileHandler {
|
||||||
public static final Set<String> EXPECTED_EXTRACTOR_FAILURES = new HashSet<>();
|
public static final Set<String> EXPECTED_EXTRACTOR_FAILURES = new HashSet<>();
|
||||||
static {
|
static {
|
||||||
// password protected files without password
|
// password protected files without password
|
||||||
// ... currently none ...
|
// ... currently none ...
|
||||||
|
|
||||||
// unsupported file-types, no supported OLE2 parts
|
// unsupported file-types, no supported OLE2 parts
|
||||||
EXPECTED_EXTRACTOR_FAILURES.add("hmef/quick-winmail.dat");
|
EXPECTED_EXTRACTOR_FAILURES.add("hmef/quick-winmail.dat");
|
||||||
|
@ -75,9 +75,9 @@ public abstract class AbstractFileHandler implements FileHandler {
|
||||||
/* Did fail for some documents with special XML contents...
|
/* Did fail for some documents with special XML contents...
|
||||||
try {
|
try {
|
||||||
OOXMLPrettyPrint.main(new String[] { file.getAbsolutePath(),
|
OOXMLPrettyPrint.main(new String[] { file.getAbsolutePath(),
|
||||||
"/tmp/pretty-" + file.getName() });
|
"/tmp/pretty-" + file.getName() });
|
||||||
} catch (ZipException e) {
|
} catch (ZipException e) {
|
||||||
// ignore, not a Zip/OOXML file
|
// ignore, not a Zip/OOXML file
|
||||||
}*/
|
}*/
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -38,128 +38,128 @@ import org.apache.poi.poifs.filesystem.OfficeXmlFileException;
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
public class BaseIntegrationTest {
|
public class BaseIntegrationTest {
|
||||||
private final File rootDir;
|
private final File rootDir;
|
||||||
private final String file;
|
private final String file;
|
||||||
private FileHandler handler;
|
private FileHandler handler;
|
||||||
|
|
||||||
public BaseIntegrationTest(File rootDir, String file, FileHandler handler) {
|
public BaseIntegrationTest(File rootDir, String file, FileHandler handler) {
|
||||||
this.rootDir = rootDir;
|
this.rootDir = rootDir;
|
||||||
this.file = file;
|
this.file = file;
|
||||||
this.handler = handler;
|
this.handler = handler;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Keep this public so it can be used by the regression-tests
|
* Keep this public so it can be used by the regression-tests
|
||||||
*/
|
*/
|
||||||
public void test() throws Exception {
|
public void test() throws Exception {
|
||||||
assertNotNull( handler, "Unknown file extension for file: " + file );
|
assertNotNull( handler, "Unknown file extension for file: " + file );
|
||||||
testOneFile(new File(rootDir, file));
|
testOneFile(new File(rootDir, file));
|
||||||
}
|
}
|
||||||
|
|
||||||
protected void testOneFile(File inputFile) throws Exception {
|
protected void testOneFile(File inputFile) throws Exception {
|
||||||
try {
|
try {
|
||||||
handleFile(inputFile);
|
handleFile(inputFile);
|
||||||
} catch (OfficeXmlFileException e) {
|
} catch (OfficeXmlFileException e) {
|
||||||
// switch XWPF and HWPF and so forth depending on the error message
|
// switch XWPF and HWPF and so forth depending on the error message
|
||||||
handleWrongOLE2XMLExtension(inputFile, e);
|
handleWrongOLE2XMLExtension(inputFile, e);
|
||||||
} catch (OldFileFormatException e) {
|
} catch (OldFileFormatException e) {
|
||||||
// Not even text extraction is supported for these: handler.handleExtracting(inputFile);
|
// Not even text extraction is supported for these: handler.handleExtracting(inputFile);
|
||||||
assumeFalse( true, "File " + file + " excluded because it is an unsupported old format" );
|
assumeFalse( true, "File " + file + " excluded because it is an unsupported old format" );
|
||||||
} catch (EncryptedDocumentException e) {
|
} catch (EncryptedDocumentException e) {
|
||||||
// Do not try to read encrypted files
|
// Do not try to read encrypted files
|
||||||
assumeFalse( true, "File " + file + " excluded because it is password-encrypted" );
|
assumeFalse( true, "File " + file + " excluded because it is password-encrypted" );
|
||||||
} catch (ZipException e) {
|
} catch (ZipException e) {
|
||||||
// some files are corrupted
|
// some files are corrupted
|
||||||
if (e.getMessage().equals("unexpected EOF") || e.getMessage().equals("Truncated ZIP file")) {
|
if (e.getMessage().equals("unexpected EOF") || e.getMessage().equals("Truncated ZIP file")) {
|
||||||
assumeFalse( true, "File " + file + " excluded because the Zip file is incomplete" );
|
assumeFalse( true, "File " + file + " excluded because the Zip file is incomplete" );
|
||||||
}
|
}
|
||||||
|
|
||||||
throw e;
|
throw e;
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
// ignore some other ways of corrupted files
|
// ignore some other ways of corrupted files
|
||||||
String message = e.getMessage();
|
String message = e.getMessage();
|
||||||
if(message != null && message.contains("Truncated ZIP file")) {
|
if(message != null && message.contains("Truncated ZIP file")) {
|
||||||
assumeFalse( true, "File " + file + " excluded because the Zip file is incomplete" );
|
assumeFalse( true, "File " + file + " excluded because the Zip file is incomplete" );
|
||||||
}
|
}
|
||||||
|
|
||||||
// sometimes binary format has XML-format-extension...
|
// sometimes binary format has XML-format-extension...
|
||||||
if(message != null && message.contains("rong file format or file extension for OO XML file")) {
|
if(message != null && message.contains("rong file format or file extension for OO XML file")) {
|
||||||
handleWrongOLE2XMLExtension(inputFile, e);
|
handleWrongOLE2XMLExtension(inputFile, e);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
throw e;
|
throw e;
|
||||||
} catch (IllegalArgumentException e) {
|
} catch (IllegalArgumentException e) {
|
||||||
// ignore errors for documents with incorrect extension
|
// ignore errors for documents with incorrect extension
|
||||||
String message = e.getMessage();
|
String message = e.getMessage();
|
||||||
if(message != null && (message.equals("The document is really a RTF file") ||
|
if(message != null && (message.equals("The document is really a RTF file") ||
|
||||||
message.equals("The document is really a PDF file") ||
|
message.equals("The document is really a PDF file") ||
|
||||||
message.equals("The document is really a HTML file"))) {
|
message.equals("The document is really a HTML file"))) {
|
||||||
assumeFalse( true, "File " + file + " excluded because it is actually a PDF/RTF/HTML file" );
|
assumeFalse( true, "File " + file + " excluded because it is actually a PDF/RTF/HTML file" );
|
||||||
}
|
}
|
||||||
|
|
||||||
if(message != null && message.equals("The document is really a OOXML file")) {
|
if(message != null && message.equals("The document is really a OOXML file")) {
|
||||||
handleWrongOLE2XMLExtension(inputFile, e);
|
handleWrongOLE2XMLExtension(inputFile, e);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
throw e;
|
throw e;
|
||||||
}
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
handler.handleExtracting(inputFile);
|
handler.handleExtracting(inputFile);
|
||||||
} catch (EncryptedDocumentException e) {
|
} catch (EncryptedDocumentException e) {
|
||||||
// Do not try to read encrypted files
|
// Do not try to read encrypted files
|
||||||
assumeFalse( true, "File " + file + " excluded because it is password-encrypted" );
|
assumeFalse( true, "File " + file + " excluded because it is password-encrypted" );
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void handleWrongOLE2XMLExtension(File inputFile, Exception e) throws Exception {
|
void handleWrongOLE2XMLExtension(File inputFile, Exception e) throws Exception {
|
||||||
// we sometimes have wrong extensions, so for some exceptions we try to handle it
|
// we sometimes have wrong extensions, so for some exceptions we try to handle it
|
||||||
// with the correct FileHandler instead
|
// with the correct FileHandler instead
|
||||||
String message = e.getMessage();
|
String message = e.getMessage();
|
||||||
|
|
||||||
// ignore some file-types that we do not want to handle here
|
// ignore some file-types that we do not want to handle here
|
||||||
assumeFalse( message != null && (message.equals("The document is really a RTF file") ||
|
assumeFalse( message != null && (message.equals("The document is really a RTF file") ||
|
||||||
message.equals("The document is really a PDF file") ||
|
message.equals("The document is really a PDF file") ||
|
||||||
message.equals("The document is really a HTML file")), "File " + file + " excluded because it is actually a PDF/RTF/HTML file" );
|
message.equals("The document is really a HTML file")), "File " + file + " excluded because it is actually a PDF/RTF/HTML file" );
|
||||||
|
|
||||||
if(message != null && (message.equals("The document is really a XLS file"))) {
|
if(message != null && (message.equals("The document is really a XLS file"))) {
|
||||||
handler = new HSSFFileHandler();
|
handler = new HSSFFileHandler();
|
||||||
} else if(message != null && (message.equals("The document is really a PPT file"))) {
|
} else if(message != null && (message.equals("The document is really a PPT file"))) {
|
||||||
handler = new HSLFFileHandler();
|
handler = new HSLFFileHandler();
|
||||||
} else if(message != null && (message.equals("The document is really a DOC file"))) {
|
} else if(message != null && (message.equals("The document is really a DOC file"))) {
|
||||||
handler = new HWPFFileHandler();
|
handler = new HWPFFileHandler();
|
||||||
} else if(message != null && (message.equals("The document is really a VSD file"))) {
|
} else if(message != null && (message.equals("The document is really a VSD file"))) {
|
||||||
handler = new HDGFFileHandler();
|
handler = new HDGFFileHandler();
|
||||||
|
|
||||||
// use XWPF instead of HWPF and XSSF instead of HSSF as the file seems to have the wrong extension
|
// use XWPF instead of HWPF and XSSF instead of HSSF as the file seems to have the wrong extension
|
||||||
} else if (handler instanceof HWPFFileHandler) {
|
} else if (handler instanceof HWPFFileHandler) {
|
||||||
handler = new XWPFFileHandler();
|
handler = new XWPFFileHandler();
|
||||||
} else if (handler instanceof HSSFFileHandler) {
|
} else if (handler instanceof HSSFFileHandler) {
|
||||||
handler = new XSSFFileHandler();
|
handler = new XSSFFileHandler();
|
||||||
} else if (handler instanceof HSLFFileHandler) {
|
} else if (handler instanceof HSLFFileHandler) {
|
||||||
handler = new XSLFFileHandler();
|
handler = new XSLFFileHandler();
|
||||||
|
|
||||||
// and the other way around, use HWPF instead of XWPF and so forth
|
// and the other way around, use HWPF instead of XWPF and so forth
|
||||||
} else if(handler instanceof XWPFFileHandler) {
|
} else if(handler instanceof XWPFFileHandler) {
|
||||||
handler = new HWPFFileHandler();
|
handler = new HWPFFileHandler();
|
||||||
} else if(handler instanceof XSSFFileHandler) {
|
} else if(handler instanceof XSSFFileHandler) {
|
||||||
handler = new HSSFFileHandler();
|
handler = new HSSFFileHandler();
|
||||||
} else if(handler instanceof XSLFFileHandler) {
|
} else if(handler instanceof XSLFFileHandler) {
|
||||||
handler = new HSLFFileHandler();
|
handler = new HSLFFileHandler();
|
||||||
} else {
|
} else {
|
||||||
// nothing matched => throw the exception to the outside
|
// nothing matched => throw the exception to the outside
|
||||||
throw e;
|
throw e;
|
||||||
}
|
}
|
||||||
|
|
||||||
// we found a different handler to try processing again
|
// we found a different handler to try processing again
|
||||||
handleFile(inputFile);
|
handleFile(inputFile);
|
||||||
}
|
}
|
||||||
|
|
||||||
private void handleFile(File inputFile) throws Exception {
|
private void handleFile(File inputFile) throws Exception {
|
||||||
try (InputStream newStream = new BufferedInputStream(new FileInputStream(inputFile), 64*1024)) {
|
try (InputStream newStream = new BufferedInputStream(new FileInputStream(inputFile), 64*1024)) {
|
||||||
handler.handleFile(newStream, inputFile.getAbsolutePath());
|
handler.handleFile(newStream, inputFile.getAbsolutePath());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -24,28 +24,28 @@ import java.io.InputStream;
|
||||||
* used in the stress testing.
|
* used in the stress testing.
|
||||||
*/
|
*/
|
||||||
public interface FileHandler {
|
public interface FileHandler {
|
||||||
/**
|
/**
|
||||||
* The FileHandler receives a stream ready for reading the
|
* The FileHandler receives a stream ready for reading the
|
||||||
* file and should handle the content that is provided and
|
* file and should handle the content that is provided and
|
||||||
* try to read and interpret the data.
|
* try to read and interpret the data.
|
||||||
*
|
*
|
||||||
* Closing is handled by the framework outside this call.
|
* Closing is handled by the framework outside this call.
|
||||||
*
|
*
|
||||||
* @param stream The input stream to read the file from.
|
* @param stream The input stream to read the file from.
|
||||||
* @param path the relative path to the file
|
* @param path the relative path to the file
|
||||||
* @throws Exception If an error happens in the file-specific handler
|
* @throws Exception If an error happens in the file-specific handler
|
||||||
*/
|
*/
|
||||||
void handleFile(InputStream stream, String path) throws Exception;
|
void handleFile(InputStream stream, String path) throws Exception;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Ensures that extracting text from the given file
|
* Ensures that extracting text from the given file
|
||||||
* is returning some text.
|
* is returning some text.
|
||||||
*/
|
*/
|
||||||
void handleExtracting(File file) throws Exception;
|
void handleExtracting(File file) throws Exception;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Allows to perform some additional work, e.g. run
|
* Allows to perform some additional work, e.g. run
|
||||||
* some of the example applications
|
* some of the example applications
|
||||||
*/
|
*/
|
||||||
void handleAdditional(File file) throws Exception;
|
void handleAdditional(File file) throws Exception;
|
||||||
}
|
}
|
||||||
|
|
|
@ -32,47 +32,47 @@ import org.apache.poi.poifs.filesystem.POIFSFileSystem;
|
||||||
import org.junit.jupiter.api.Test;
|
import org.junit.jupiter.api.Test;
|
||||||
|
|
||||||
class HDGFFileHandler extends POIFSFileHandler {
|
class HDGFFileHandler extends POIFSFileHandler {
|
||||||
@Override
|
@Override
|
||||||
public void handleFile(InputStream stream, String path) throws IOException {
|
public void handleFile(InputStream stream, String path) throws IOException {
|
||||||
POIFSFileSystem poifs = new POIFSFileSystem(stream);
|
POIFSFileSystem poifs = new POIFSFileSystem(stream);
|
||||||
HDGFDiagram diagram = new HDGFDiagram(poifs);
|
HDGFDiagram diagram = new HDGFDiagram(poifs);
|
||||||
Stream[] topLevelStreams = diagram.getTopLevelStreams();
|
Stream[] topLevelStreams = diagram.getTopLevelStreams();
|
||||||
assertNotNull(topLevelStreams);
|
assertNotNull(topLevelStreams);
|
||||||
for(Stream str : topLevelStreams) {
|
for(Stream str : topLevelStreams) {
|
||||||
assertTrue(str.getPointer().getLength() >= 0);
|
assertTrue(str.getPointer().getLength() >= 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
TrailerStream trailerStream = diagram.getTrailerStream();
|
TrailerStream trailerStream = diagram.getTrailerStream();
|
||||||
assertNotNull(trailerStream);
|
assertNotNull(trailerStream);
|
||||||
assertTrue(trailerStream.getPointer().getLength() >= 0);
|
assertTrue(trailerStream.getPointer().getLength() >= 0);
|
||||||
diagram.close();
|
diagram.close();
|
||||||
poifs.close();
|
poifs.close();
|
||||||
|
|
||||||
// writing is not yet implemented... handlePOIDocument(diagram);
|
// writing is not yet implemented... handlePOIDocument(diagram);
|
||||||
}
|
}
|
||||||
|
|
||||||
// a test-case to test this locally without executing the full TestAllFiles
|
// a test-case to test this locally without executing the full TestAllFiles
|
||||||
@Override
|
@Override
|
||||||
@Test
|
@Test
|
||||||
void test() throws Exception {
|
void test() throws Exception {
|
||||||
File file = new File("test-data/diagram/44501.vsd");
|
File file = new File("test-data/diagram/44501.vsd");
|
||||||
|
|
||||||
InputStream stream = new FileInputStream(file);
|
InputStream stream = new FileInputStream(file);
|
||||||
try {
|
try {
|
||||||
handleFile(stream, file.getPath());
|
handleFile(stream, file.getPath());
|
||||||
} finally {
|
} finally {
|
||||||
stream.close();
|
stream.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
handleExtracting(file);
|
handleExtracting(file);
|
||||||
|
|
||||||
stream = new FileInputStream(file);
|
stream = new FileInputStream(file);
|
||||||
try {
|
try {
|
||||||
try (VisioTextExtractor extractor = new VisioTextExtractor(stream)) {
|
try (VisioTextExtractor extractor = new VisioTextExtractor(stream)) {
|
||||||
assertNotNull(extractor.getText());
|
assertNotNull(extractor.getText());
|
||||||
}
|
}
|
||||||
} finally {
|
} finally {
|
||||||
stream.close();
|
stream.close();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -34,58 +34,58 @@ import org.junit.jupiter.api.Test;
|
||||||
|
|
||||||
class HMEFFileHandler extends AbstractFileHandler {
|
class HMEFFileHandler extends AbstractFileHandler {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void handleExtracting(File file) throws Exception {
|
public void handleExtracting(File file) throws Exception {
|
||||||
FileMagic fm = FileMagic.valueOf(file);
|
FileMagic fm = FileMagic.valueOf(file);
|
||||||
if (fm == FileMagic.OLE2) {
|
if (fm == FileMagic.OLE2) {
|
||||||
super.handleExtracting(file);
|
super.handleExtracting(file);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void handleFile(InputStream stream, String path) throws Exception {
|
public void handleFile(InputStream stream, String path) throws Exception {
|
||||||
HMEFMessage msg = new HMEFMessage(stream);
|
HMEFMessage msg = new HMEFMessage(stream);
|
||||||
|
|
||||||
// there are test-files that have no body...
|
// there are test-files that have no body...
|
||||||
String[] HTML_BODY = {
|
String[] HTML_BODY = {
|
||||||
"Testing TNEF Message", "TNEF test message with attachments", "Test"
|
"Testing TNEF Message", "TNEF test message with attachments", "Test"
|
||||||
};
|
};
|
||||||
String bodyStr;
|
String bodyStr;
|
||||||
if(Arrays.asList(HTML_BODY).contains(msg.getSubject())) {
|
if(Arrays.asList(HTML_BODY).contains(msg.getSubject())) {
|
||||||
MAPIAttribute bodyHtml = msg.getMessageMAPIAttribute(MAPIProperty.BODY_HTML);
|
MAPIAttribute bodyHtml = msg.getMessageMAPIAttribute(MAPIProperty.BODY_HTML);
|
||||||
assertNotNull(bodyHtml);
|
assertNotNull(bodyHtml);
|
||||||
bodyStr = new String(bodyHtml.getData(), getEncoding(msg));
|
bodyStr = new String(bodyHtml.getData(), getEncoding(msg));
|
||||||
} else {
|
} else {
|
||||||
bodyStr = msg.getBody();
|
bodyStr = msg.getBody();
|
||||||
}
|
}
|
||||||
assertNotNull( bodyStr, "Body is not set" );
|
assertNotNull( bodyStr, "Body is not set" );
|
||||||
assertNotNull( msg.getSubject(), "Subject is not set" );
|
assertNotNull( msg.getSubject(), "Subject is not set" );
|
||||||
}
|
}
|
||||||
|
|
||||||
// a test-case to test this locally without executing the full TestAllFiles
|
// a test-case to test this locally without executing the full TestAllFiles
|
||||||
@Test
|
@Test
|
||||||
void test() throws Exception {
|
void test() throws Exception {
|
||||||
String path = "test-data/hmef/quick-winmail.dat";
|
String path = "test-data/hmef/quick-winmail.dat";
|
||||||
try (InputStream stream = new FileInputStream(path)) {
|
try (InputStream stream = new FileInputStream(path)) {
|
||||||
handleFile(stream, path);
|
handleFile(stream, path);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private String getEncoding(HMEFMessage tnefDat) {
|
private String getEncoding(HMEFMessage tnefDat) {
|
||||||
TNEFAttribute oemCP = tnefDat.getMessageAttribute(TNEFProperty.ID_OEMCODEPAGE);
|
TNEFAttribute oemCP = tnefDat.getMessageAttribute(TNEFProperty.ID_OEMCODEPAGE);
|
||||||
MAPIAttribute cpId = tnefDat.getMessageMAPIAttribute(MAPIProperty.INTERNET_CPID);
|
MAPIAttribute cpId = tnefDat.getMessageMAPIAttribute(MAPIProperty.INTERNET_CPID);
|
||||||
int codePage = 1252;
|
int codePage = 1252;
|
||||||
if (oemCP != null) {
|
if (oemCP != null) {
|
||||||
codePage = LittleEndian.getInt(oemCP.getData());
|
codePage = LittleEndian.getInt(oemCP.getData());
|
||||||
} else if (cpId != null) {
|
} else if (cpId != null) {
|
||||||
codePage = LittleEndian.getInt(cpId.getData());
|
codePage = LittleEndian.getInt(cpId.getData());
|
||||||
}
|
}
|
||||||
switch (codePage) {
|
switch (codePage) {
|
||||||
// see http://en.wikipedia.org/wiki/Code_page for more
|
// see http://en.wikipedia.org/wiki/Code_page for more
|
||||||
case 1252: return "Windows-1252";
|
case 1252: return "Windows-1252";
|
||||||
case 20127: return "US-ASCII";
|
case 20127: return "US-ASCII";
|
||||||
default: return "cp"+codePage;
|
default: return "cp"+codePage;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -28,40 +28,40 @@ import org.apache.poi.poifs.filesystem.POIFSFileSystem;
|
||||||
import org.junit.jupiter.api.Test;
|
import org.junit.jupiter.api.Test;
|
||||||
|
|
||||||
class HPBFFileHandler extends POIFSFileHandler {
|
class HPBFFileHandler extends POIFSFileHandler {
|
||||||
@Override
|
@Override
|
||||||
public void handleFile(InputStream stream, String path) throws Exception {
|
public void handleFile(InputStream stream, String path) throws Exception {
|
||||||
HPBFDocument pub = new HPBFDocument(new POIFSFileSystem(stream));
|
HPBFDocument pub = new HPBFDocument(new POIFSFileSystem(stream));
|
||||||
assertNotNull(pub.getEscherDelayStm());
|
assertNotNull(pub.getEscherDelayStm());
|
||||||
assertNotNull(pub.getMainContents());
|
assertNotNull(pub.getMainContents());
|
||||||
assertNotNull(pub.getQuillContents());
|
assertNotNull(pub.getQuillContents());
|
||||||
|
|
||||||
// writing is not yet implemented... handlePOIDocument(pub);
|
// writing is not yet implemented... handlePOIDocument(pub);
|
||||||
pub.close();
|
pub.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
// a test-case to test this locally without executing the full TestAllFiles
|
// a test-case to test this locally without executing the full TestAllFiles
|
||||||
@Override
|
@Override
|
||||||
@Test
|
@Test
|
||||||
void test() throws Exception {
|
void test() throws Exception {
|
||||||
File file = new File("test-data/publisher/SampleBrochure.pub");
|
File file = new File("test-data/publisher/SampleBrochure.pub");
|
||||||
|
|
||||||
InputStream stream = new FileInputStream(file);
|
InputStream stream = new FileInputStream(file);
|
||||||
try {
|
try {
|
||||||
handleFile(stream, file.getPath());
|
handleFile(stream, file.getPath());
|
||||||
} finally {
|
} finally {
|
||||||
stream.close();
|
stream.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
handleExtracting(file);
|
handleExtracting(file);
|
||||||
|
|
||||||
stream = new FileInputStream(file);
|
stream = new FileInputStream(file);
|
||||||
try {
|
try {
|
||||||
try (PublisherTextExtractor extractor = new PublisherTextExtractor(stream)) {
|
try (PublisherTextExtractor extractor = new PublisherTextExtractor(stream)) {
|
||||||
assertNotNull(extractor.getText());
|
assertNotNull(extractor.getText());
|
||||||
}
|
}
|
||||||
} finally {
|
} finally {
|
||||||
stream.close();
|
stream.close();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -66,30 +66,30 @@ class HPSFFileHandler extends POIFSFileHandler {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void handleFile(InputStream stream, String path) throws Exception {
|
public void handleFile(InputStream stream, String path) throws Exception {
|
||||||
POIFSFileSystem poifs = new POIFSFileSystem(stream);
|
POIFSFileSystem poifs = new POIFSFileSystem(stream);
|
||||||
HPSFPropertiesOnlyDocument hpsf = new HPSFPropertiesOnlyDocument(poifs);
|
HPSFPropertiesOnlyDocument hpsf = new HPSFPropertiesOnlyDocument(poifs);
|
||||||
DocumentSummaryInformation dsi = hpsf.getDocumentSummaryInformation();
|
DocumentSummaryInformation dsi = hpsf.getDocumentSummaryInformation();
|
||||||
SummaryInformation si = hpsf.getSummaryInformation();
|
SummaryInformation si = hpsf.getSummaryInformation();
|
||||||
boolean hasDSI = hasPropertyStream(poifs, DocumentSummaryInformation.DEFAULT_STREAM_NAME);
|
boolean hasDSI = hasPropertyStream(poifs, DocumentSummaryInformation.DEFAULT_STREAM_NAME);
|
||||||
boolean hasSI = hasPropertyStream(poifs, SummaryInformation.DEFAULT_STREAM_NAME);
|
boolean hasSI = hasPropertyStream(poifs, SummaryInformation.DEFAULT_STREAM_NAME);
|
||||||
|
|
||||||
assertEquals(hasDSI, dsi != null);
|
assertEquals(hasDSI, dsi != null);
|
||||||
assertEquals(hasSI, si != null);
|
assertEquals(hasSI, si != null);
|
||||||
|
|
||||||
handlePOIDocument(hpsf);
|
handlePOIDocument(hpsf);
|
||||||
}
|
}
|
||||||
|
|
||||||
private static boolean hasPropertyStream(POIFSFileSystem poifs, String streamName) throws IOException {
|
private static boolean hasPropertyStream(POIFSFileSystem poifs, String streamName) throws IOException {
|
||||||
DirectoryNode root = poifs.getRoot();
|
DirectoryNode root = poifs.getRoot();
|
||||||
if (!root.hasEntry(streamName)) {
|
if (!root.hasEntry(streamName)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
try (DocumentInputStream dis = root.createDocumentInputStream(streamName)) {
|
try (DocumentInputStream dis = root.createDocumentInputStream(streamName)) {
|
||||||
return PropertySet.isPropertySetStream(dis);
|
return PropertySet.isPropertySetStream(dis);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private static File getTempFile() {
|
private static File getTempFile() {
|
||||||
File f = null;
|
File f = null;
|
||||||
try {
|
try {
|
||||||
f = TempFile.createTempFile("hpsfCopy", "out");
|
f = TempFile.createTempFile("hpsfCopy", "out");
|
||||||
|
@ -112,16 +112,16 @@ class HPSFFileHandler extends POIFSFileHandler {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
// a test-case to test this locally without executing the full TestAllFiles
|
// a test-case to test this locally without executing the full TestAllFiles
|
||||||
@Override
|
@Override
|
||||||
@Test
|
@Test
|
||||||
@SuppressWarnings("java:S2699")
|
@SuppressWarnings("java:S2699")
|
||||||
void test() throws Exception {
|
void test() throws Exception {
|
||||||
String path = "test-data/diagram/44501.vsd";
|
String path = "test-data/diagram/44501.vsd";
|
||||||
try (InputStream stream = new FileInputStream(path)) {
|
try (InputStream stream = new FileInputStream(path)) {
|
||||||
handleFile(stream, path);
|
handleFile(stream, path);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// a test-case to test this locally without executing the full TestAllFiles
|
// a test-case to test this locally without executing the full TestAllFiles
|
||||||
@Test
|
@Test
|
||||||
|
|
|
@ -28,61 +28,61 @@ import org.apache.poi.hsmf.datatypes.DirectoryChunk;
|
||||||
import org.junit.jupiter.api.Test;
|
import org.junit.jupiter.api.Test;
|
||||||
|
|
||||||
class HSMFFileHandler extends POIFSFileHandler {
|
class HSMFFileHandler extends POIFSFileHandler {
|
||||||
@Override
|
@Override
|
||||||
public void handleFile(InputStream stream, String path) throws Exception {
|
public void handleFile(InputStream stream, String path) throws Exception {
|
||||||
MAPIMessage mapi = new MAPIMessage(stream);
|
MAPIMessage mapi = new MAPIMessage(stream);
|
||||||
assertNotNull(mapi.getAttachmentFiles());
|
assertNotNull(mapi.getAttachmentFiles());
|
||||||
assertNotNull(mapi.getDisplayBCC());
|
assertNotNull(mapi.getDisplayBCC());
|
||||||
assertNotNull(mapi.getMessageDate());
|
assertNotNull(mapi.getMessageDate());
|
||||||
|
|
||||||
AttachmentChunks[] attachments = mapi.getAttachmentFiles();
|
AttachmentChunks[] attachments = mapi.getAttachmentFiles();
|
||||||
|
|
||||||
for(AttachmentChunks attachment : attachments) {
|
for(AttachmentChunks attachment : attachments) {
|
||||||
|
|
||||||
DirectoryChunk chunkDirectory = attachment.getAttachmentDirectory();
|
DirectoryChunk chunkDirectory = attachment.getAttachmentDirectory();
|
||||||
if(chunkDirectory != null) {
|
if(chunkDirectory != null) {
|
||||||
MAPIMessage attachmentMSG = chunkDirectory.getAsEmbeddedMessage();
|
MAPIMessage attachmentMSG = chunkDirectory.getAsEmbeddedMessage();
|
||||||
assertNotNull(attachmentMSG);
|
assertNotNull(attachmentMSG);
|
||||||
String body = attachmentMSG.getTextBody();
|
String body = attachmentMSG.getTextBody();
|
||||||
assertNotNull(body);
|
assertNotNull(body);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* => Writing isn't yet supported...
|
/* => Writing isn't yet supported...
|
||||||
// write out the file
|
// write out the file
|
||||||
File file = TempFile.createTempFile("StressTest", ".msg");
|
File file = TempFile.createTempFile("StressTest", ".msg");
|
||||||
writeToFile(mapi, file);
|
writeToFile(mapi, file);
|
||||||
|
|
||||||
MAPIMessage read = new MAPIMessage(file.getAbsolutePath());
|
MAPIMessage read = new MAPIMessage(file.getAbsolutePath());
|
||||||
assertNotNull(read.getAttachmentFiles());
|
assertNotNull(read.getAttachmentFiles());
|
||||||
assertNotNull(read.getDisplayBCC());
|
assertNotNull(read.getDisplayBCC());
|
||||||
assertNotNull(read.getMessageDate());
|
assertNotNull(read.getMessageDate());
|
||||||
*/
|
*/
|
||||||
|
|
||||||
// writing is not yet supported... handlePOIDocument(mapi);
|
// writing is not yet supported... handlePOIDocument(mapi);
|
||||||
|
|
||||||
mapi.close();
|
mapi.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
// private void writeToFile(MAPIMessage mapi, File file)
|
// private void writeToFile(MAPIMessage mapi, File file)
|
||||||
// throws FileNotFoundException, IOException {
|
// throws FileNotFoundException, IOException {
|
||||||
// OutputStream stream = new FileOutputStream(file);
|
// OutputStream stream = new FileOutputStream(file);
|
||||||
// try {
|
// try {
|
||||||
// mapi.write(stream);
|
// mapi.write(stream);
|
||||||
// } finally {
|
// } finally {
|
||||||
// stream.close();
|
// stream.close();
|
||||||
// }
|
// }
|
||||||
// }
|
// }
|
||||||
|
|
||||||
// a test-case to test this locally without executing the full TestAllFiles
|
// a test-case to test this locally without executing the full TestAllFiles
|
||||||
@Override
|
@Override
|
||||||
@Test
|
@Test
|
||||||
void test() throws Exception {
|
void test() throws Exception {
|
||||||
File file = new File("test-data/hsmf/logsat.com_signatures_valid.msg");
|
File file = new File("test-data/hsmf/logsat.com_signatures_valid.msg");
|
||||||
try (InputStream stream = new FileInputStream(file)) {
|
try (InputStream stream = new FileInputStream(file)) {
|
||||||
handleFile(stream, file.getPath());
|
handleFile(stream, file.getPath());
|
||||||
}
|
}
|
||||||
|
|
||||||
handleExtracting(file);
|
handleExtracting(file);
|
||||||
}
|
}
|
||||||
}
|
}
|
|
@ -37,90 +37,90 @@ import org.apache.commons.io.output.NullPrintStream;
|
||||||
import org.junit.jupiter.api.Test;
|
import org.junit.jupiter.api.Test;
|
||||||
|
|
||||||
class HSSFFileHandler extends SpreadsheetHandler {
|
class HSSFFileHandler extends SpreadsheetHandler {
|
||||||
private final POIFSFileHandler delegate = new POIFSFileHandler();
|
private final POIFSFileHandler delegate = new POIFSFileHandler();
|
||||||
@Override
|
@Override
|
||||||
public void handleFile(InputStream stream, String path) throws Exception {
|
public void handleFile(InputStream stream, String path) throws Exception {
|
||||||
HSSFWorkbook wb = new HSSFWorkbook(stream);
|
HSSFWorkbook wb = new HSSFWorkbook(stream);
|
||||||
handleWorkbook(wb);
|
handleWorkbook(wb);
|
||||||
|
|
||||||
// TODO: some documents fail currently...
|
// TODO: some documents fail currently...
|
||||||
// Note - as of Bugzilla 48036 (svn r828244, r828247) POI is capable of evaluating
|
// Note - as of Bugzilla 48036 (svn r828244, r828247) POI is capable of evaluating
|
||||||
// IntersectionPtg. However it is still not capable of parsing it.
|
// IntersectionPtg. However it is still not capable of parsing it.
|
||||||
// So FormulaEvalTestData.xls now contains a few formulas that produce errors here.
|
// So FormulaEvalTestData.xls now contains a few formulas that produce errors here.
|
||||||
//HSSFFormulaEvaluator evaluator = new HSSFFormulaEvaluator(wb);
|
//HSSFFormulaEvaluator evaluator = new HSSFFormulaEvaluator(wb);
|
||||||
//evaluator.evaluateAll();
|
//evaluator.evaluateAll();
|
||||||
|
|
||||||
delegate.handlePOIDocument(wb);
|
delegate.handlePOIDocument(wb);
|
||||||
|
|
||||||
// also try to see if some of the Records behave incorrectly
|
// also try to see if some of the Records behave incorrectly
|
||||||
// TODO: still fails on some records... RecordsStresser.handleWorkbook(wb);
|
// TODO: still fails on some records... RecordsStresser.handleWorkbook(wb);
|
||||||
|
|
||||||
HSSFOptimiser.optimiseCellStyles(wb);
|
HSSFOptimiser.optimiseCellStyles(wb);
|
||||||
for(Sheet sheet : wb) {
|
for(Sheet sheet : wb) {
|
||||||
for (Row row : sheet) {
|
for (Row row : sheet) {
|
||||||
for (Cell cell : row) {
|
for (Cell cell : row) {
|
||||||
assertNotNull(cell.getCellStyle());
|
assertNotNull(cell.getCellStyle());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
HSSFOptimiser.optimiseFonts(wb);
|
HSSFOptimiser.optimiseFonts(wb);
|
||||||
}
|
}
|
||||||
|
|
||||||
private static final Set<String> EXPECTED_ADDITIONAL_FAILURES = new HashSet<>();
|
private static final Set<String> EXPECTED_ADDITIONAL_FAILURES = new HashSet<>();
|
||||||
static {
|
static {
|
||||||
// encrypted
|
// encrypted
|
||||||
EXPECTED_ADDITIONAL_FAILURES.add("spreadsheet/35897-type4.xls");
|
EXPECTED_ADDITIONAL_FAILURES.add("spreadsheet/35897-type4.xls");
|
||||||
EXPECTED_ADDITIONAL_FAILURES.add("spreadsheet/xor-encryption-abc.xls");
|
EXPECTED_ADDITIONAL_FAILURES.add("spreadsheet/xor-encryption-abc.xls");
|
||||||
EXPECTED_ADDITIONAL_FAILURES.add("spreadsheet/password.xls");
|
EXPECTED_ADDITIONAL_FAILURES.add("spreadsheet/password.xls");
|
||||||
// broken files
|
// broken files
|
||||||
EXPECTED_ADDITIONAL_FAILURES.add("spreadsheet/43493.xls");
|
EXPECTED_ADDITIONAL_FAILURES.add("spreadsheet/43493.xls");
|
||||||
// TODO: ok to ignore?
|
// TODO: ok to ignore?
|
||||||
EXPECTED_ADDITIONAL_FAILURES.add("spreadsheet/50833.xls");
|
EXPECTED_ADDITIONAL_FAILURES.add("spreadsheet/50833.xls");
|
||||||
EXPECTED_ADDITIONAL_FAILURES.add("spreadsheet/51832.xls");
|
EXPECTED_ADDITIONAL_FAILURES.add("spreadsheet/51832.xls");
|
||||||
EXPECTED_ADDITIONAL_FAILURES.add("spreadsheet/XRefCalc.xls");
|
EXPECTED_ADDITIONAL_FAILURES.add("spreadsheet/XRefCalc.xls");
|
||||||
EXPECTED_ADDITIONAL_FAILURES.add("spreadsheet/61300.xls");
|
EXPECTED_ADDITIONAL_FAILURES.add("spreadsheet/61300.xls");
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void handleAdditional(File file) throws Exception {
|
public void handleAdditional(File file) throws Exception {
|
||||||
// redirect stdout as the examples often write lots of text
|
// redirect stdout as the examples often write lots of text
|
||||||
PrintStream oldOut = System.out;
|
PrintStream oldOut = System.out;
|
||||||
String fileWithParent = file.getParentFile().getName() + "/" + file.getName();
|
String fileWithParent = file.getParentFile().getName() + "/" + file.getName();
|
||||||
try {
|
try {
|
||||||
System.setOut(new NullPrintStream());
|
System.setOut(new NullPrintStream());
|
||||||
|
|
||||||
BiffViewer.main(new String[]{file.getAbsolutePath()});
|
BiffViewer.main(new String[]{file.getAbsolutePath()});
|
||||||
|
|
||||||
assertFalse( EXPECTED_ADDITIONAL_FAILURES.contains(fileWithParent), "Expected Extraction to fail for file " + file + " and handler " + this + ", but did not fail!" );
|
assertFalse( EXPECTED_ADDITIONAL_FAILURES.contains(fileWithParent), "Expected Extraction to fail for file " + file + " and handler " + this + ", but did not fail!" );
|
||||||
} catch (OldExcelFormatException e) {
|
} catch (OldExcelFormatException e) {
|
||||||
// old excel formats are not supported here
|
// old excel formats are not supported here
|
||||||
} catch (RuntimeException e) {
|
} catch (RuntimeException e) {
|
||||||
if(!EXPECTED_ADDITIONAL_FAILURES.contains(fileWithParent)) {
|
if(!EXPECTED_ADDITIONAL_FAILURES.contains(fileWithParent)) {
|
||||||
throw e;
|
throw e;
|
||||||
}
|
}
|
||||||
} finally {
|
} finally {
|
||||||
System.setOut(oldOut);
|
System.setOut(oldOut);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// a test-case to test this locally without executing the full TestAllFiles
|
// a test-case to test this locally without executing the full TestAllFiles
|
||||||
@Test
|
@Test
|
||||||
void test() throws Exception {
|
void test() throws Exception {
|
||||||
File file = new File("../test-data/spreadsheet/59074.xls");
|
File file = new File("../test-data/spreadsheet/59074.xls");
|
||||||
|
|
||||||
try (InputStream stream = new FileInputStream(file)) {
|
try (InputStream stream = new FileInputStream(file)) {
|
||||||
handleFile(stream, file.getPath());
|
handleFile(stream, file.getPath());
|
||||||
}
|
}
|
||||||
|
|
||||||
handleExtracting(file);
|
handleExtracting(file);
|
||||||
|
|
||||||
handleAdditional(file);
|
handleAdditional(file);
|
||||||
}
|
}
|
||||||
|
|
||||||
// a test-case to test this locally without executing the full TestAllFiles
|
// a test-case to test this locally without executing the full TestAllFiles
|
||||||
@Test
|
@Test
|
||||||
@SuppressWarnings("java:S2699")
|
@SuppressWarnings("java:S2699")
|
||||||
void testExtractor() throws Exception {
|
void testExtractor() throws Exception {
|
||||||
handleExtracting(new File("../test-data/spreadsheet/59074.xls"));
|
handleExtracting(new File("../test-data/spreadsheet/59074.xls"));
|
||||||
}
|
}
|
||||||
|
|
|
@ -31,7 +31,7 @@ import org.apache.poi.xwpf.usermodel.XWPFRelation;
|
||||||
import org.junit.jupiter.api.Test;
|
import org.junit.jupiter.api.Test;
|
||||||
|
|
||||||
class OPCFileHandler extends AbstractFileHandler {
|
class OPCFileHandler extends AbstractFileHandler {
|
||||||
@Override
|
@Override
|
||||||
public void handleFile(InputStream stream, String path) throws Exception {
|
public void handleFile(InputStream stream, String path) throws Exception {
|
||||||
// ignore password protected files
|
// ignore password protected files
|
||||||
if (POIXMLDocumentHandler.isEncrypted(stream)) return;
|
if (POIXMLDocumentHandler.isEncrypted(stream)) return;
|
||||||
|
@ -59,15 +59,15 @@ class OPCFileHandler extends AbstractFileHandler {
|
||||||
// text-extraction is not possible currently for these types of files
|
// text-extraction is not possible currently for these types of files
|
||||||
}
|
}
|
||||||
|
|
||||||
// a test-case to test this locally without executing the full TestAllFiles
|
// a test-case to test this locally without executing the full TestAllFiles
|
||||||
@Test
|
@Test
|
||||||
void test() throws Exception {
|
void test() throws Exception {
|
||||||
File file = new File("test-data/diagram/test.vsdx");
|
File file = new File("test-data/diagram/test.vsdx");
|
||||||
|
|
||||||
try (InputStream stream = new PushbackInputStream(new FileInputStream(file), 100000)) {
|
try (InputStream stream = new PushbackInputStream(new FileInputStream(file), 100000)) {
|
||||||
handleFile(stream, file.getPath());
|
handleFile(stream, file.getPath());
|
||||||
}
|
}
|
||||||
|
|
||||||
handleExtracting(file);
|
handleExtracting(file);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -31,15 +31,15 @@ import org.junit.jupiter.api.Test;
|
||||||
|
|
||||||
class POIFSFileHandler extends AbstractFileHandler {
|
class POIFSFileHandler extends AbstractFileHandler {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void handleFile(InputStream stream, String path) throws Exception {
|
public void handleFile(InputStream stream, String path) throws Exception {
|
||||||
try (POIFSFileSystem fs = new POIFSFileSystem(stream)) {
|
try (POIFSFileSystem fs = new POIFSFileSystem(stream)) {
|
||||||
handlePOIFSFileSystem(fs);
|
handlePOIFSFileSystem(fs);
|
||||||
handleHPSFProperties(fs);
|
handleHPSFProperties(fs);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private void handleHPSFProperties(POIFSFileSystem fs) throws IOException {
|
private void handleHPSFProperties(POIFSFileSystem fs) throws IOException {
|
||||||
try (HPSFPropertiesExtractor ext = new HPSFPropertiesExtractor(fs)) {
|
try (HPSFPropertiesExtractor ext = new HPSFPropertiesExtractor(fs)) {
|
||||||
// can be null
|
// can be null
|
||||||
ext.getDocSummaryInformation();
|
ext.getDocSummaryInformation();
|
||||||
|
@ -52,12 +52,12 @@ class POIFSFileHandler extends AbstractFileHandler {
|
||||||
}
|
}
|
||||||
|
|
||||||
private void handlePOIFSFileSystem(POIFSFileSystem fs) {
|
private void handlePOIFSFileSystem(POIFSFileSystem fs) {
|
||||||
assertNotNull(fs);
|
assertNotNull(fs);
|
||||||
assertNotNull(fs.getRoot());
|
assertNotNull(fs.getRoot());
|
||||||
}
|
}
|
||||||
|
|
||||||
protected void handlePOIDocument(POIDocument doc) throws Exception {
|
protected void handlePOIDocument(POIDocument doc) throws Exception {
|
||||||
try (UnsynchronizedByteArrayOutputStream out = new UnsynchronizedByteArrayOutputStream()) {
|
try (UnsynchronizedByteArrayOutputStream out = new UnsynchronizedByteArrayOutputStream()) {
|
||||||
doc.write(out);
|
doc.write(out);
|
||||||
|
|
||||||
try (InputStream in = out.toInputStream();
|
try (InputStream in = out.toInputStream();
|
||||||
|
@ -65,7 +65,7 @@ class POIFSFileHandler extends AbstractFileHandler {
|
||||||
handlePOIFSFileSystem(fs);
|
handlePOIFSFileSystem(fs);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// a test-case to test this locally without executing the full TestAllFiles
|
// a test-case to test this locally without executing the full TestAllFiles
|
||||||
@Test
|
@Test
|
||||||
|
|
|
@ -29,13 +29,13 @@ import org.apache.xmlbeans.XmlCursor;
|
||||||
import org.apache.xmlbeans.XmlObject;
|
import org.apache.xmlbeans.XmlObject;
|
||||||
|
|
||||||
public final class POIXMLDocumentHandler {
|
public final class POIXMLDocumentHandler {
|
||||||
protected void handlePOIXMLDocument(POIXMLDocument doc) throws Exception {
|
protected void handlePOIXMLDocument(POIXMLDocument doc) throws Exception {
|
||||||
assertNotNull(doc.getAllEmbeddedParts());
|
assertNotNull(doc.getAllEmbeddedParts());
|
||||||
assertNotNull(doc.getPackage());
|
assertNotNull(doc.getPackage());
|
||||||
assertNotNull(doc.getPackagePart());
|
assertNotNull(doc.getPackagePart());
|
||||||
assertNotNull(doc.getProperties());
|
assertNotNull(doc.getProperties());
|
||||||
assertNotNull(doc.getRelations());
|
assertNotNull(doc.getRelations());
|
||||||
}
|
}
|
||||||
|
|
||||||
protected static boolean isEncrypted(InputStream stream) throws IOException {
|
protected static boolean isEncrypted(InputStream stream) throws IOException {
|
||||||
if (FileMagic.valueOf(stream) == FileMagic.OLE2) {
|
if (FileMagic.valueOf(stream) == FileMagic.OLE2) {
|
||||||
|
|
|
@ -33,68 +33,68 @@ import org.apache.poi.util.RecordFormatException;
|
||||||
import org.apache.poi.xssf.usermodel.XSSFChartSheet;
|
import org.apache.poi.xssf.usermodel.XSSFChartSheet;
|
||||||
|
|
||||||
public abstract class SpreadsheetHandler extends AbstractFileHandler {
|
public abstract class SpreadsheetHandler extends AbstractFileHandler {
|
||||||
public void handleWorkbook(Workbook wb) throws IOException {
|
public void handleWorkbook(Workbook wb) throws IOException {
|
||||||
// try to access some of the content
|
// try to access some of the content
|
||||||
readContent(wb);
|
readContent(wb);
|
||||||
|
|
||||||
// write out the file
|
// write out the file
|
||||||
writeToArray(wb);
|
writeToArray(wb);
|
||||||
|
|
||||||
// access some more content (we had cases where writing corrupts the data in memory)
|
// access some more content (we had cases where writing corrupts the data in memory)
|
||||||
readContent(wb);
|
readContent(wb);
|
||||||
|
|
||||||
// write once more
|
// write once more
|
||||||
UnsynchronizedByteArrayOutputStream out = writeToArray(wb);
|
UnsynchronizedByteArrayOutputStream out = writeToArray(wb);
|
||||||
|
|
||||||
// read in the written file
|
// read in the written file
|
||||||
Workbook read = WorkbookFactory.create(out.toInputStream());
|
Workbook read = WorkbookFactory.create(out.toInputStream());
|
||||||
|
|
||||||
assertNotNull(read);
|
assertNotNull(read);
|
||||||
|
|
||||||
readContent(read);
|
readContent(read);
|
||||||
|
|
||||||
extractEmbedded(read);
|
extractEmbedded(read);
|
||||||
|
|
||||||
modifyContent(read);
|
modifyContent(read);
|
||||||
|
|
||||||
read.close();
|
read.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
private UnsynchronizedByteArrayOutputStream writeToArray(Workbook wb) throws IOException {
|
private UnsynchronizedByteArrayOutputStream writeToArray(Workbook wb) throws IOException {
|
||||||
UnsynchronizedByteArrayOutputStream stream = new UnsynchronizedByteArrayOutputStream();
|
UnsynchronizedByteArrayOutputStream stream = new UnsynchronizedByteArrayOutputStream();
|
||||||
wb.write(stream);
|
wb.write(stream);
|
||||||
return stream;
|
return stream;
|
||||||
}
|
}
|
||||||
|
|
||||||
private void readContent(Workbook wb) {
|
private void readContent(Workbook wb) {
|
||||||
for(int i = 0;i < wb.getNumberOfSheets();i++) {
|
for(int i = 0;i < wb.getNumberOfSheets();i++) {
|
||||||
Sheet sheet = wb.getSheetAt(i);
|
Sheet sheet = wb.getSheetAt(i);
|
||||||
assertNotNull(wb.getSheet(sheet.getSheetName()));
|
assertNotNull(wb.getSheet(sheet.getSheetName()));
|
||||||
sheet.groupColumn((short) 4, (short) 5);
|
sheet.groupColumn((short) 4, (short) 5);
|
||||||
sheet.setColumnGroupCollapsed(4, true);
|
sheet.setColumnGroupCollapsed(4, true);
|
||||||
sheet.setColumnGroupCollapsed(4, false);
|
sheet.setColumnGroupCollapsed(4, false);
|
||||||
|
|
||||||
// don't do this for very large sheets as it will take a long time
|
// don't do this for very large sheets as it will take a long time
|
||||||
if(sheet.getPhysicalNumberOfRows() > 1000) {
|
if(sheet.getPhysicalNumberOfRows() > 1000) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
for(Row row : sheet) {
|
for(Row row : sheet) {
|
||||||
for(Cell cell : row) {
|
for(Cell cell : row) {
|
||||||
assertNotNull(cell.toString());
|
assertNotNull(cell.toString());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
for (Name name : wb.getAllNames()) {
|
for (Name name : wb.getAllNames()) {
|
||||||
// this sometimes caused exceptions
|
// this sometimes caused exceptions
|
||||||
if(!name.isFunctionName()) {
|
if(!name.isFunctionName()) {
|
||||||
name.getRefersToFormula();
|
name.getRefersToFormula();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private void extractEmbedded(Workbook wb) throws IOException {
|
private void extractEmbedded(Workbook wb) throws IOException {
|
||||||
EmbeddedExtractor ee = new EmbeddedExtractor();
|
EmbeddedExtractor ee = new EmbeddedExtractor();
|
||||||
|
|
||||||
for (Sheet s : wb) {
|
for (Sheet s : wb) {
|
||||||
|
@ -104,48 +104,48 @@ public abstract class SpreadsheetHandler extends AbstractFileHandler {
|
||||||
assertNotNull(ed.getShape());
|
assertNotNull(ed.getShape());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private void modifyContent(Workbook wb) {
|
private void modifyContent(Workbook wb) {
|
||||||
/* a number of file fail because of various things: udf, unimplemented functions, ...
|
/* a number of file fail because of various things: udf, unimplemented functions, ...
|
||||||
we would need quite a list of excludes and the large regression tests would probably
|
we would need quite a list of excludes and the large regression tests would probably
|
||||||
take a lot longer to run...
|
take a lot longer to run...
|
||||||
try {
|
try {
|
||||||
// try to re-compute all formulas to find cases where parsing fails
|
// try to re-compute all formulas to find cases where parsing fails
|
||||||
wb.getCreationHelper().createFormulaEvaluator().evaluateAll();
|
wb.getCreationHelper().createFormulaEvaluator().evaluateAll();
|
||||||
} catch (RuntimeException e) {
|
} catch (RuntimeException e) {
|
||||||
// only allow a specific exception which indicates that an external
|
// only allow a specific exception which indicates that an external
|
||||||
// reference was not found
|
// reference was not found
|
||||||
if(!e.getMessage().contains("Could not resolve external workbook name")) {
|
if(!e.getMessage().contains("Could not resolve external workbook name")) {
|
||||||
throw e;
|
throw e;
|
||||||
}
|
}
|
||||||
|
|
||||||
}*/
|
}*/
|
||||||
|
|
||||||
for (int i=wb.getNumberOfSheets()-1; i>=0; i--) {
|
for (int i=wb.getNumberOfSheets()-1; i>=0; i--) {
|
||||||
if(wb.getSheetAt(i) instanceof XSSFChartSheet) {
|
if(wb.getSheetAt(i) instanceof XSSFChartSheet) {
|
||||||
// clone for chart-sheets is not supported
|
// clone for chart-sheets is not supported
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
wb.cloneSheet(i);
|
wb.cloneSheet(i);
|
||||||
} catch (RecordFormatException e) {
|
} catch (RecordFormatException e) {
|
||||||
if (e.getCause() instanceof CloneNotSupportedException) {
|
if (e.getCause() instanceof CloneNotSupportedException) {
|
||||||
// ignore me
|
// ignore me
|
||||||
continue;
|
continue;
|
||||||
}
|
|
||||||
throw e;
|
|
||||||
} catch (RuntimeException e) {
|
|
||||||
if ("Could not find 'internal references' EXTERNALBOOK".equals(e.getMessage()) ||
|
|
||||||
"CountryRecord not found".equals(e.getMessage()) ||
|
|
||||||
"CountryRecord or SSTRecord not found".equals(e.getMessage()) ||
|
|
||||||
"Cannot add more than 65535 shapes".equals(e.getMessage()) ) {
|
|
||||||
// ignore these here for now
|
|
||||||
continue;
|
|
||||||
}
|
}
|
||||||
throw e;
|
throw e;
|
||||||
}
|
} catch (RuntimeException e) {
|
||||||
}
|
if ("Could not find 'internal references' EXTERNALBOOK".equals(e.getMessage()) ||
|
||||||
}
|
"CountryRecord not found".equals(e.getMessage()) ||
|
||||||
|
"CountryRecord or SSTRecord not found".equals(e.getMessage()) ||
|
||||||
|
"Cannot add more than 65535 shapes".equals(e.getMessage()) ) {
|
||||||
|
// ignore these here for now
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
throw e;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
|
@ -31,55 +31,55 @@ import org.apache.poi.xslf.usermodel.XSLFSlideShow;
|
||||||
import org.junit.jupiter.api.Test;
|
import org.junit.jupiter.api.Test;
|
||||||
|
|
||||||
class XSLFFileHandler extends SlideShowHandler {
|
class XSLFFileHandler extends SlideShowHandler {
|
||||||
@Override
|
@Override
|
||||||
public void handleFile(InputStream stream, String path) throws Exception {
|
public void handleFile(InputStream stream, String path) throws Exception {
|
||||||
try (XMLSlideShow slide = new XMLSlideShow(stream);
|
try (XMLSlideShow slide = new XMLSlideShow(stream);
|
||||||
XSLFSlideShow slideInner = new XSLFSlideShow(slide.getPackage())) {
|
XSLFSlideShow slideInner = new XSLFSlideShow(slide.getPackage())) {
|
||||||
;
|
;
|
||||||
assertNotNull(slideInner.getPresentation());
|
assertNotNull(slideInner.getPresentation());
|
||||||
assertNotNull(slideInner.getSlideMasterReferences());
|
assertNotNull(slideInner.getSlideMasterReferences());
|
||||||
assertNotNull(slideInner.getSlideReferences());
|
assertNotNull(slideInner.getSlideReferences());
|
||||||
|
|
||||||
new POIXMLDocumentHandler().handlePOIXMLDocument(slide);
|
new POIXMLDocumentHandler().handlePOIXMLDocument(slide);
|
||||||
|
|
||||||
handleSlideShow(slide);
|
handleSlideShow(slide);
|
||||||
} catch (POIXMLException e) {
|
} catch (POIXMLException e) {
|
||||||
Exception cause = (Exception)e.getCause();
|
Exception cause = (Exception)e.getCause();
|
||||||
throw cause == null ? e : cause;
|
throw cause == null ? e : cause;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void handleExtracting(File file) throws Exception {
|
public void handleExtracting(File file) throws Exception {
|
||||||
super.handleExtracting(file);
|
super.handleExtracting(file);
|
||||||
|
|
||||||
|
|
||||||
// additionally try the other getText() methods
|
// additionally try the other getText() methods
|
||||||
try (SlideShowExtractor<?,?> extractor = (SlideShowExtractor<?, ?>) ExtractorFactory.createExtractor(file)) {
|
try (SlideShowExtractor<?,?> extractor = (SlideShowExtractor<?, ?>) ExtractorFactory.createExtractor(file)) {
|
||||||
assertNotNull(extractor);
|
assertNotNull(extractor);
|
||||||
extractor.setSlidesByDefault(true);
|
extractor.setSlidesByDefault(true);
|
||||||
extractor.setNotesByDefault(true);
|
extractor.setNotesByDefault(true);
|
||||||
extractor.setMasterByDefault(true);
|
extractor.setMasterByDefault(true);
|
||||||
|
|
||||||
assertNotNull(extractor.getText());
|
assertNotNull(extractor.getText());
|
||||||
|
|
||||||
extractor.setSlidesByDefault(false);
|
extractor.setSlidesByDefault(false);
|
||||||
extractor.setNotesByDefault(false);
|
extractor.setNotesByDefault(false);
|
||||||
extractor.setMasterByDefault(false);
|
extractor.setMasterByDefault(false);
|
||||||
|
|
||||||
assertEquals("", extractor.getText(), "With all options disabled we should not get text");
|
assertEquals("", extractor.getText(), "With all options disabled we should not get text");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// a test-case to test this locally without executing the full TestAllFiles
|
// a test-case to test this locally without executing the full TestAllFiles
|
||||||
@Override
|
@Override
|
||||||
@Test
|
@Test
|
||||||
void test() throws Exception {
|
void test() throws Exception {
|
||||||
File file = new File("test-data/slideshow/ca.ubc.cs.people_~emhill_presentations_HowWeRefactor.pptx");
|
File file = new File("test-data/slideshow/ca.ubc.cs.people_~emhill_presentations_HowWeRefactor.pptx");
|
||||||
try (InputStream stream = new FileInputStream(file)) {
|
try (InputStream stream = new FileInputStream(file)) {
|
||||||
handleFile(stream, file.getPath());
|
handleFile(stream, file.getPath());
|
||||||
}
|
}
|
||||||
|
|
||||||
handleExtracting(file);
|
handleExtracting(file);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue