mirror of https://github.com/apache/poi.git
convert tabs to spaces
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1890125 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
212a7b9655
commit
0eb475ee3a
|
@ -46,7 +46,7 @@ public abstract class AbstractFileHandler implements FileHandler {
|
|||
public static final Set<String> EXPECTED_EXTRACTOR_FAILURES = new HashSet<>();
|
||||
static {
|
||||
// password protected files without password
|
||||
// ... currently none ...
|
||||
// ... currently none ...
|
||||
|
||||
// unsupported file-types, no supported OLE2 parts
|
||||
EXPECTED_EXTRACTOR_FAILURES.add("hmef/quick-winmail.dat");
|
||||
|
@ -75,9 +75,9 @@ public abstract class AbstractFileHandler implements FileHandler {
|
|||
/* Did fail for some documents with special XML contents...
|
||||
try {
|
||||
OOXMLPrettyPrint.main(new String[] { file.getAbsolutePath(),
|
||||
"/tmp/pretty-" + file.getName() });
|
||||
"/tmp/pretty-" + file.getName() });
|
||||
} catch (ZipException e) {
|
||||
// ignore, not a Zip/OOXML file
|
||||
// ignore, not a Zip/OOXML file
|
||||
}*/
|
||||
}
|
||||
|
||||
|
|
|
@ -38,128 +38,128 @@ import org.apache.poi.poifs.filesystem.OfficeXmlFileException;
|
|||
*
|
||||
*/
|
||||
public class BaseIntegrationTest {
|
||||
private final File rootDir;
|
||||
private final String file;
|
||||
private FileHandler handler;
|
||||
private final File rootDir;
|
||||
private final String file;
|
||||
private FileHandler handler;
|
||||
|
||||
public BaseIntegrationTest(File rootDir, String file, FileHandler handler) {
|
||||
this.rootDir = rootDir;
|
||||
this.file = file;
|
||||
this.handler = handler;
|
||||
}
|
||||
public BaseIntegrationTest(File rootDir, String file, FileHandler handler) {
|
||||
this.rootDir = rootDir;
|
||||
this.file = file;
|
||||
this.handler = handler;
|
||||
}
|
||||
|
||||
/**
|
||||
* Keep this public so it can be used by the regression-tests
|
||||
*/
|
||||
public void test() throws Exception {
|
||||
/**
|
||||
* Keep this public so it can be used by the regression-tests
|
||||
*/
|
||||
public void test() throws Exception {
|
||||
assertNotNull( handler, "Unknown file extension for file: " + file );
|
||||
testOneFile(new File(rootDir, file));
|
||||
}
|
||||
testOneFile(new File(rootDir, file));
|
||||
}
|
||||
|
||||
protected void testOneFile(File inputFile) throws Exception {
|
||||
try {
|
||||
handleFile(inputFile);
|
||||
} catch (OfficeXmlFileException e) {
|
||||
// switch XWPF and HWPF and so forth depending on the error message
|
||||
handleWrongOLE2XMLExtension(inputFile, e);
|
||||
} catch (OldFileFormatException e) {
|
||||
// Not even text extraction is supported for these: handler.handleExtracting(inputFile);
|
||||
assumeFalse( true, "File " + file + " excluded because it is an unsupported old format" );
|
||||
} catch (EncryptedDocumentException e) {
|
||||
// Do not try to read encrypted files
|
||||
assumeFalse( true, "File " + file + " excluded because it is password-encrypted" );
|
||||
} catch (ZipException e) {
|
||||
// some files are corrupted
|
||||
if (e.getMessage().equals("unexpected EOF") || e.getMessage().equals("Truncated ZIP file")) {
|
||||
assumeFalse( true, "File " + file + " excluded because the Zip file is incomplete" );
|
||||
}
|
||||
protected void testOneFile(File inputFile) throws Exception {
|
||||
try {
|
||||
handleFile(inputFile);
|
||||
} catch (OfficeXmlFileException e) {
|
||||
// switch XWPF and HWPF and so forth depending on the error message
|
||||
handleWrongOLE2XMLExtension(inputFile, e);
|
||||
} catch (OldFileFormatException e) {
|
||||
// Not even text extraction is supported for these: handler.handleExtracting(inputFile);
|
||||
assumeFalse( true, "File " + file + " excluded because it is an unsupported old format" );
|
||||
} catch (EncryptedDocumentException e) {
|
||||
// Do not try to read encrypted files
|
||||
assumeFalse( true, "File " + file + " excluded because it is password-encrypted" );
|
||||
} catch (ZipException e) {
|
||||
// some files are corrupted
|
||||
if (e.getMessage().equals("unexpected EOF") || e.getMessage().equals("Truncated ZIP file")) {
|
||||
assumeFalse( true, "File " + file + " excluded because the Zip file is incomplete" );
|
||||
}
|
||||
|
||||
throw e;
|
||||
} catch (IOException e) {
|
||||
// ignore some other ways of corrupted files
|
||||
String message = e.getMessage();
|
||||
if(message != null && message.contains("Truncated ZIP file")) {
|
||||
assumeFalse( true, "File " + file + " excluded because the Zip file is incomplete" );
|
||||
}
|
||||
throw e;
|
||||
} catch (IOException e) {
|
||||
// ignore some other ways of corrupted files
|
||||
String message = e.getMessage();
|
||||
if(message != null && message.contains("Truncated ZIP file")) {
|
||||
assumeFalse( true, "File " + file + " excluded because the Zip file is incomplete" );
|
||||
}
|
||||
|
||||
// sometimes binary format has XML-format-extension...
|
||||
if(message != null && message.contains("rong file format or file extension for OO XML file")) {
|
||||
handleWrongOLE2XMLExtension(inputFile, e);
|
||||
return;
|
||||
}
|
||||
// sometimes binary format has XML-format-extension...
|
||||
if(message != null && message.contains("rong file format or file extension for OO XML file")) {
|
||||
handleWrongOLE2XMLExtension(inputFile, e);
|
||||
return;
|
||||
}
|
||||
|
||||
throw e;
|
||||
} catch (IllegalArgumentException e) {
|
||||
// ignore errors for documents with incorrect extension
|
||||
String message = e.getMessage();
|
||||
if(message != null && (message.equals("The document is really a RTF file") ||
|
||||
message.equals("The document is really a PDF file") ||
|
||||
message.equals("The document is really a HTML file"))) {
|
||||
assumeFalse( true, "File " + file + " excluded because it is actually a PDF/RTF/HTML file" );
|
||||
}
|
||||
throw e;
|
||||
} catch (IllegalArgumentException e) {
|
||||
// ignore errors for documents with incorrect extension
|
||||
String message = e.getMessage();
|
||||
if(message != null && (message.equals("The document is really a RTF file") ||
|
||||
message.equals("The document is really a PDF file") ||
|
||||
message.equals("The document is really a HTML file"))) {
|
||||
assumeFalse( true, "File " + file + " excluded because it is actually a PDF/RTF/HTML file" );
|
||||
}
|
||||
|
||||
if(message != null && message.equals("The document is really a OOXML file")) {
|
||||
handleWrongOLE2XMLExtension(inputFile, e);
|
||||
return;
|
||||
}
|
||||
if(message != null && message.equals("The document is really a OOXML file")) {
|
||||
handleWrongOLE2XMLExtension(inputFile, e);
|
||||
return;
|
||||
}
|
||||
|
||||
throw e;
|
||||
}
|
||||
throw e;
|
||||
}
|
||||
|
||||
try {
|
||||
handler.handleExtracting(inputFile);
|
||||
} catch (EncryptedDocumentException e) {
|
||||
// Do not try to read encrypted files
|
||||
assumeFalse( true, "File " + file + " excluded because it is password-encrypted" );
|
||||
}
|
||||
}
|
||||
try {
|
||||
handler.handleExtracting(inputFile);
|
||||
} catch (EncryptedDocumentException e) {
|
||||
// Do not try to read encrypted files
|
||||
assumeFalse( true, "File " + file + " excluded because it is password-encrypted" );
|
||||
}
|
||||
}
|
||||
|
||||
void handleWrongOLE2XMLExtension(File inputFile, Exception e) throws Exception {
|
||||
// we sometimes have wrong extensions, so for some exceptions we try to handle it
|
||||
// with the correct FileHandler instead
|
||||
String message = e.getMessage();
|
||||
// we sometimes have wrong extensions, so for some exceptions we try to handle it
|
||||
// with the correct FileHandler instead
|
||||
String message = e.getMessage();
|
||||
|
||||
// ignore some file-types that we do not want to handle here
|
||||
assumeFalse( message != null && (message.equals("The document is really a RTF file") ||
|
||||
message.equals("The document is really a PDF file") ||
|
||||
message.equals("The document is really a HTML file")), "File " + file + " excluded because it is actually a PDF/RTF/HTML file" );
|
||||
// ignore some file-types that we do not want to handle here
|
||||
assumeFalse( message != null && (message.equals("The document is really a RTF file") ||
|
||||
message.equals("The document is really a PDF file") ||
|
||||
message.equals("The document is really a HTML file")), "File " + file + " excluded because it is actually a PDF/RTF/HTML file" );
|
||||
|
||||
if(message != null && (message.equals("The document is really a XLS file"))) {
|
||||
handler = new HSSFFileHandler();
|
||||
} else if(message != null && (message.equals("The document is really a PPT file"))) {
|
||||
handler = new HSLFFileHandler();
|
||||
} else if(message != null && (message.equals("The document is really a DOC file"))) {
|
||||
handler = new HWPFFileHandler();
|
||||
} else if(message != null && (message.equals("The document is really a VSD file"))) {
|
||||
handler = new HDGFFileHandler();
|
||||
if(message != null && (message.equals("The document is really a XLS file"))) {
|
||||
handler = new HSSFFileHandler();
|
||||
} else if(message != null && (message.equals("The document is really a PPT file"))) {
|
||||
handler = new HSLFFileHandler();
|
||||
} else if(message != null && (message.equals("The document is really a DOC file"))) {
|
||||
handler = new HWPFFileHandler();
|
||||
} else if(message != null && (message.equals("The document is really a VSD file"))) {
|
||||
handler = new HDGFFileHandler();
|
||||
|
||||
// use XWPF instead of HWPF and XSSF instead of HSSF as the file seems to have the wrong extension
|
||||
} else if (handler instanceof HWPFFileHandler) {
|
||||
// use XWPF instead of HWPF and XSSF instead of HSSF as the file seems to have the wrong extension
|
||||
} else if (handler instanceof HWPFFileHandler) {
|
||||
handler = new XWPFFileHandler();
|
||||
} else if (handler instanceof HSSFFileHandler) {
|
||||
handler = new XSSFFileHandler();
|
||||
} else if (handler instanceof HSLFFileHandler) {
|
||||
handler = new XSLFFileHandler();
|
||||
handler = new XSLFFileHandler();
|
||||
|
||||
// and the other way around, use HWPF instead of XWPF and so forth
|
||||
} else if(handler instanceof XWPFFileHandler) {
|
||||
handler = new HWPFFileHandler();
|
||||
} else if(handler instanceof XSSFFileHandler) {
|
||||
handler = new HSSFFileHandler();
|
||||
} else if(handler instanceof XSLFFileHandler) {
|
||||
handler = new HSLFFileHandler();
|
||||
// and the other way around, use HWPF instead of XWPF and so forth
|
||||
} else if(handler instanceof XWPFFileHandler) {
|
||||
handler = new HWPFFileHandler();
|
||||
} else if(handler instanceof XSSFFileHandler) {
|
||||
handler = new HSSFFileHandler();
|
||||
} else if(handler instanceof XSLFFileHandler) {
|
||||
handler = new HSLFFileHandler();
|
||||
} else {
|
||||
// nothing matched => throw the exception to the outside
|
||||
throw e;
|
||||
}
|
||||
// nothing matched => throw the exception to the outside
|
||||
throw e;
|
||||
}
|
||||
|
||||
// we found a different handler to try processing again
|
||||
handleFile(inputFile);
|
||||
}
|
||||
// we found a different handler to try processing again
|
||||
handleFile(inputFile);
|
||||
}
|
||||
|
||||
private void handleFile(File inputFile) throws Exception {
|
||||
try (InputStream newStream = new BufferedInputStream(new FileInputStream(inputFile), 64*1024)) {
|
||||
handler.handleFile(newStream, inputFile.getAbsolutePath());
|
||||
}
|
||||
}
|
||||
private void handleFile(File inputFile) throws Exception {
|
||||
try (InputStream newStream = new BufferedInputStream(new FileInputStream(inputFile), 64*1024)) {
|
||||
handler.handleFile(newStream, inputFile.getAbsolutePath());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -24,28 +24,28 @@ import java.io.InputStream;
|
|||
* used in the stress testing.
|
||||
*/
|
||||
public interface FileHandler {
|
||||
/**
|
||||
* The FileHandler receives a stream ready for reading the
|
||||
* file and should handle the content that is provided and
|
||||
* try to read and interpret the data.
|
||||
*
|
||||
* Closing is handled by the framework outside this call.
|
||||
*
|
||||
* @param stream The input stream to read the file from.
|
||||
* @param path the relative path to the file
|
||||
* @throws Exception If an error happens in the file-specific handler
|
||||
*/
|
||||
void handleFile(InputStream stream, String path) throws Exception;
|
||||
/**
|
||||
* The FileHandler receives a stream ready for reading the
|
||||
* file and should handle the content that is provided and
|
||||
* try to read and interpret the data.
|
||||
*
|
||||
* Closing is handled by the framework outside this call.
|
||||
*
|
||||
* @param stream The input stream to read the file from.
|
||||
* @param path the relative path to the file
|
||||
* @throws Exception If an error happens in the file-specific handler
|
||||
*/
|
||||
void handleFile(InputStream stream, String path) throws Exception;
|
||||
|
||||
/**
|
||||
* Ensures that extracting text from the given file
|
||||
* is returning some text.
|
||||
*/
|
||||
void handleExtracting(File file) throws Exception;
|
||||
/**
|
||||
* Ensures that extracting text from the given file
|
||||
* is returning some text.
|
||||
*/
|
||||
void handleExtracting(File file) throws Exception;
|
||||
|
||||
/**
|
||||
* Allows to perform some additional work, e.g. run
|
||||
* some of the example applications
|
||||
*/
|
||||
void handleAdditional(File file) throws Exception;
|
||||
/**
|
||||
* Allows to perform some additional work, e.g. run
|
||||
* some of the example applications
|
||||
*/
|
||||
void handleAdditional(File file) throws Exception;
|
||||
}
|
||||
|
|
|
@ -32,47 +32,47 @@ import org.apache.poi.poifs.filesystem.POIFSFileSystem;
|
|||
import org.junit.jupiter.api.Test;
|
||||
|
||||
class HDGFFileHandler extends POIFSFileHandler {
|
||||
@Override
|
||||
public void handleFile(InputStream stream, String path) throws IOException {
|
||||
POIFSFileSystem poifs = new POIFSFileSystem(stream);
|
||||
HDGFDiagram diagram = new HDGFDiagram(poifs);
|
||||
Stream[] topLevelStreams = diagram.getTopLevelStreams();
|
||||
assertNotNull(topLevelStreams);
|
||||
for(Stream str : topLevelStreams) {
|
||||
assertTrue(str.getPointer().getLength() >= 0);
|
||||
}
|
||||
@Override
|
||||
public void handleFile(InputStream stream, String path) throws IOException {
|
||||
POIFSFileSystem poifs = new POIFSFileSystem(stream);
|
||||
HDGFDiagram diagram = new HDGFDiagram(poifs);
|
||||
Stream[] topLevelStreams = diagram.getTopLevelStreams();
|
||||
assertNotNull(topLevelStreams);
|
||||
for(Stream str : topLevelStreams) {
|
||||
assertTrue(str.getPointer().getLength() >= 0);
|
||||
}
|
||||
|
||||
TrailerStream trailerStream = diagram.getTrailerStream();
|
||||
assertNotNull(trailerStream);
|
||||
assertTrue(trailerStream.getPointer().getLength() >= 0);
|
||||
diagram.close();
|
||||
poifs.close();
|
||||
TrailerStream trailerStream = diagram.getTrailerStream();
|
||||
assertNotNull(trailerStream);
|
||||
assertTrue(trailerStream.getPointer().getLength() >= 0);
|
||||
diagram.close();
|
||||
poifs.close();
|
||||
|
||||
// writing is not yet implemented... handlePOIDocument(diagram);
|
||||
}
|
||||
// writing is not yet implemented... handlePOIDocument(diagram);
|
||||
}
|
||||
|
||||
// a test-case to test this locally without executing the full TestAllFiles
|
||||
@Override
|
||||
// a test-case to test this locally without executing the full TestAllFiles
|
||||
@Override
|
||||
@Test
|
||||
void test() throws Exception {
|
||||
void test() throws Exception {
|
||||
File file = new File("test-data/diagram/44501.vsd");
|
||||
|
||||
InputStream stream = new FileInputStream(file);
|
||||
try {
|
||||
handleFile(stream, file.getPath());
|
||||
} finally {
|
||||
stream.close();
|
||||
}
|
||||
try {
|
||||
handleFile(stream, file.getPath());
|
||||
} finally {
|
||||
stream.close();
|
||||
}
|
||||
|
||||
handleExtracting(file);
|
||||
handleExtracting(file);
|
||||
|
||||
stream = new FileInputStream(file);
|
||||
try {
|
||||
try (VisioTextExtractor extractor = new VisioTextExtractor(stream)) {
|
||||
assertNotNull(extractor.getText());
|
||||
}
|
||||
} finally {
|
||||
stream.close();
|
||||
}
|
||||
}
|
||||
stream = new FileInputStream(file);
|
||||
try {
|
||||
try (VisioTextExtractor extractor = new VisioTextExtractor(stream)) {
|
||||
assertNotNull(extractor.getText());
|
||||
}
|
||||
} finally {
|
||||
stream.close();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -34,58 +34,58 @@ import org.junit.jupiter.api.Test;
|
|||
|
||||
class HMEFFileHandler extends AbstractFileHandler {
|
||||
|
||||
@Override
|
||||
public void handleExtracting(File file) throws Exception {
|
||||
FileMagic fm = FileMagic.valueOf(file);
|
||||
if (fm == FileMagic.OLE2) {
|
||||
super.handleExtracting(file);
|
||||
}
|
||||
}
|
||||
@Override
|
||||
public void handleExtracting(File file) throws Exception {
|
||||
FileMagic fm = FileMagic.valueOf(file);
|
||||
if (fm == FileMagic.OLE2) {
|
||||
super.handleExtracting(file);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
@Override
|
||||
public void handleFile(InputStream stream, String path) throws Exception {
|
||||
HMEFMessage msg = new HMEFMessage(stream);
|
||||
HMEFMessage msg = new HMEFMessage(stream);
|
||||
|
||||
// there are test-files that have no body...
|
||||
String[] HTML_BODY = {
|
||||
"Testing TNEF Message", "TNEF test message with attachments", "Test"
|
||||
};
|
||||
String bodyStr;
|
||||
if(Arrays.asList(HTML_BODY).contains(msg.getSubject())) {
|
||||
MAPIAttribute bodyHtml = msg.getMessageMAPIAttribute(MAPIProperty.BODY_HTML);
|
||||
assertNotNull(bodyHtml);
|
||||
bodyStr = new String(bodyHtml.getData(), getEncoding(msg));
|
||||
} else {
|
||||
bodyStr = msg.getBody();
|
||||
}
|
||||
assertNotNull( bodyStr, "Body is not set" );
|
||||
assertNotNull( msg.getSubject(), "Subject is not set" );
|
||||
}
|
||||
// there are test-files that have no body...
|
||||
String[] HTML_BODY = {
|
||||
"Testing TNEF Message", "TNEF test message with attachments", "Test"
|
||||
};
|
||||
String bodyStr;
|
||||
if(Arrays.asList(HTML_BODY).contains(msg.getSubject())) {
|
||||
MAPIAttribute bodyHtml = msg.getMessageMAPIAttribute(MAPIProperty.BODY_HTML);
|
||||
assertNotNull(bodyHtml);
|
||||
bodyStr = new String(bodyHtml.getData(), getEncoding(msg));
|
||||
} else {
|
||||
bodyStr = msg.getBody();
|
||||
}
|
||||
assertNotNull( bodyStr, "Body is not set" );
|
||||
assertNotNull( msg.getSubject(), "Subject is not set" );
|
||||
}
|
||||
|
||||
// a test-case to test this locally without executing the full TestAllFiles
|
||||
@Test
|
||||
void test() throws Exception {
|
||||
String path = "test-data/hmef/quick-winmail.dat";
|
||||
try (InputStream stream = new FileInputStream(path)) {
|
||||
handleFile(stream, path);
|
||||
}
|
||||
}
|
||||
// a test-case to test this locally without executing the full TestAllFiles
|
||||
@Test
|
||||
void test() throws Exception {
|
||||
String path = "test-data/hmef/quick-winmail.dat";
|
||||
try (InputStream stream = new FileInputStream(path)) {
|
||||
handleFile(stream, path);
|
||||
}
|
||||
}
|
||||
|
||||
private String getEncoding(HMEFMessage tnefDat) {
|
||||
TNEFAttribute oemCP = tnefDat.getMessageAttribute(TNEFProperty.ID_OEMCODEPAGE);
|
||||
MAPIAttribute cpId = tnefDat.getMessageMAPIAttribute(MAPIProperty.INTERNET_CPID);
|
||||
int codePage = 1252;
|
||||
if (oemCP != null) {
|
||||
codePage = LittleEndian.getInt(oemCP.getData());
|
||||
} else if (cpId != null) {
|
||||
codePage = LittleEndian.getInt(cpId.getData());
|
||||
}
|
||||
switch (codePage) {
|
||||
// see http://en.wikipedia.org/wiki/Code_page for more
|
||||
case 1252: return "Windows-1252";
|
||||
case 20127: return "US-ASCII";
|
||||
default: return "cp"+codePage;
|
||||
}
|
||||
}
|
||||
private String getEncoding(HMEFMessage tnefDat) {
|
||||
TNEFAttribute oemCP = tnefDat.getMessageAttribute(TNEFProperty.ID_OEMCODEPAGE);
|
||||
MAPIAttribute cpId = tnefDat.getMessageMAPIAttribute(MAPIProperty.INTERNET_CPID);
|
||||
int codePage = 1252;
|
||||
if (oemCP != null) {
|
||||
codePage = LittleEndian.getInt(oemCP.getData());
|
||||
} else if (cpId != null) {
|
||||
codePage = LittleEndian.getInt(cpId.getData());
|
||||
}
|
||||
switch (codePage) {
|
||||
// see http://en.wikipedia.org/wiki/Code_page for more
|
||||
case 1252: return "Windows-1252";
|
||||
case 20127: return "US-ASCII";
|
||||
default: return "cp"+codePage;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -28,40 +28,40 @@ import org.apache.poi.poifs.filesystem.POIFSFileSystem;
|
|||
import org.junit.jupiter.api.Test;
|
||||
|
||||
class HPBFFileHandler extends POIFSFileHandler {
|
||||
@Override
|
||||
public void handleFile(InputStream stream, String path) throws Exception {
|
||||
HPBFDocument pub = new HPBFDocument(new POIFSFileSystem(stream));
|
||||
assertNotNull(pub.getEscherDelayStm());
|
||||
assertNotNull(pub.getMainContents());
|
||||
assertNotNull(pub.getQuillContents());
|
||||
@Override
|
||||
public void handleFile(InputStream stream, String path) throws Exception {
|
||||
HPBFDocument pub = new HPBFDocument(new POIFSFileSystem(stream));
|
||||
assertNotNull(pub.getEscherDelayStm());
|
||||
assertNotNull(pub.getMainContents());
|
||||
assertNotNull(pub.getQuillContents());
|
||||
|
||||
// writing is not yet implemented... handlePOIDocument(pub);
|
||||
pub.close();
|
||||
}
|
||||
// writing is not yet implemented... handlePOIDocument(pub);
|
||||
pub.close();
|
||||
}
|
||||
|
||||
// a test-case to test this locally without executing the full TestAllFiles
|
||||
@Override
|
||||
// a test-case to test this locally without executing the full TestAllFiles
|
||||
@Override
|
||||
@Test
|
||||
void test() throws Exception {
|
||||
void test() throws Exception {
|
||||
File file = new File("test-data/publisher/SampleBrochure.pub");
|
||||
|
||||
InputStream stream = new FileInputStream(file);
|
||||
try {
|
||||
handleFile(stream, file.getPath());
|
||||
} finally {
|
||||
stream.close();
|
||||
}
|
||||
try {
|
||||
handleFile(stream, file.getPath());
|
||||
} finally {
|
||||
stream.close();
|
||||
}
|
||||
|
||||
handleExtracting(file);
|
||||
handleExtracting(file);
|
||||
|
||||
stream = new FileInputStream(file);
|
||||
try {
|
||||
try (PublisherTextExtractor extractor = new PublisherTextExtractor(stream)) {
|
||||
assertNotNull(extractor.getText());
|
||||
}
|
||||
} finally {
|
||||
stream.close();
|
||||
}
|
||||
}
|
||||
stream = new FileInputStream(file);
|
||||
try {
|
||||
try (PublisherTextExtractor extractor = new PublisherTextExtractor(stream)) {
|
||||
assertNotNull(extractor.getText());
|
||||
}
|
||||
} finally {
|
||||
stream.close();
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -66,30 +66,30 @@ class HPSFFileHandler extends POIFSFileHandler {
|
|||
|
||||
@Override
|
||||
public void handleFile(InputStream stream, String path) throws Exception {
|
||||
POIFSFileSystem poifs = new POIFSFileSystem(stream);
|
||||
HPSFPropertiesOnlyDocument hpsf = new HPSFPropertiesOnlyDocument(poifs);
|
||||
DocumentSummaryInformation dsi = hpsf.getDocumentSummaryInformation();
|
||||
SummaryInformation si = hpsf.getSummaryInformation();
|
||||
boolean hasDSI = hasPropertyStream(poifs, DocumentSummaryInformation.DEFAULT_STREAM_NAME);
|
||||
boolean hasSI = hasPropertyStream(poifs, SummaryInformation.DEFAULT_STREAM_NAME);
|
||||
POIFSFileSystem poifs = new POIFSFileSystem(stream);
|
||||
HPSFPropertiesOnlyDocument hpsf = new HPSFPropertiesOnlyDocument(poifs);
|
||||
DocumentSummaryInformation dsi = hpsf.getDocumentSummaryInformation();
|
||||
SummaryInformation si = hpsf.getSummaryInformation();
|
||||
boolean hasDSI = hasPropertyStream(poifs, DocumentSummaryInformation.DEFAULT_STREAM_NAME);
|
||||
boolean hasSI = hasPropertyStream(poifs, SummaryInformation.DEFAULT_STREAM_NAME);
|
||||
|
||||
assertEquals(hasDSI, dsi != null);
|
||||
assertEquals(hasDSI, dsi != null);
|
||||
assertEquals(hasSI, si != null);
|
||||
|
||||
handlePOIDocument(hpsf);
|
||||
}
|
||||
handlePOIDocument(hpsf);
|
||||
}
|
||||
|
||||
private static boolean hasPropertyStream(POIFSFileSystem poifs, String streamName) throws IOException {
|
||||
private static boolean hasPropertyStream(POIFSFileSystem poifs, String streamName) throws IOException {
|
||||
DirectoryNode root = poifs.getRoot();
|
||||
if (!root.hasEntry(streamName)) {
|
||||
return false;
|
||||
}
|
||||
if (!root.hasEntry(streamName)) {
|
||||
return false;
|
||||
}
|
||||
try (DocumentInputStream dis = root.createDocumentInputStream(streamName)) {
|
||||
return PropertySet.isPropertySetStream(dis);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static File getTempFile() {
|
||||
private static File getTempFile() {
|
||||
File f = null;
|
||||
try {
|
||||
f = TempFile.createTempFile("hpsfCopy", "out");
|
||||
|
@ -112,16 +112,16 @@ class HPSFFileHandler extends POIFSFileHandler {
|
|||
}
|
||||
|
||||
|
||||
// a test-case to test this locally without executing the full TestAllFiles
|
||||
@Override
|
||||
// a test-case to test this locally without executing the full TestAllFiles
|
||||
@Override
|
||||
@Test
|
||||
@SuppressWarnings("java:S2699")
|
||||
void test() throws Exception {
|
||||
String path = "test-data/diagram/44501.vsd";
|
||||
void test() throws Exception {
|
||||
String path = "test-data/diagram/44501.vsd";
|
||||
try (InputStream stream = new FileInputStream(path)) {
|
||||
handleFile(stream, path);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// a test-case to test this locally without executing the full TestAllFiles
|
||||
@Test
|
||||
|
|
|
@ -28,61 +28,61 @@ import org.apache.poi.hsmf.datatypes.DirectoryChunk;
|
|||
import org.junit.jupiter.api.Test;
|
||||
|
||||
class HSMFFileHandler extends POIFSFileHandler {
|
||||
@Override
|
||||
public void handleFile(InputStream stream, String path) throws Exception {
|
||||
MAPIMessage mapi = new MAPIMessage(stream);
|
||||
assertNotNull(mapi.getAttachmentFiles());
|
||||
assertNotNull(mapi.getDisplayBCC());
|
||||
assertNotNull(mapi.getMessageDate());
|
||||
@Override
|
||||
public void handleFile(InputStream stream, String path) throws Exception {
|
||||
MAPIMessage mapi = new MAPIMessage(stream);
|
||||
assertNotNull(mapi.getAttachmentFiles());
|
||||
assertNotNull(mapi.getDisplayBCC());
|
||||
assertNotNull(mapi.getMessageDate());
|
||||
|
||||
AttachmentChunks[] attachments = mapi.getAttachmentFiles();
|
||||
AttachmentChunks[] attachments = mapi.getAttachmentFiles();
|
||||
|
||||
for(AttachmentChunks attachment : attachments) {
|
||||
for(AttachmentChunks attachment : attachments) {
|
||||
|
||||
DirectoryChunk chunkDirectory = attachment.getAttachmentDirectory();
|
||||
if(chunkDirectory != null) {
|
||||
MAPIMessage attachmentMSG = chunkDirectory.getAsEmbeddedMessage();
|
||||
assertNotNull(attachmentMSG);
|
||||
String body = attachmentMSG.getTextBody();
|
||||
assertNotNull(body);
|
||||
}
|
||||
}
|
||||
DirectoryChunk chunkDirectory = attachment.getAttachmentDirectory();
|
||||
if(chunkDirectory != null) {
|
||||
MAPIMessage attachmentMSG = chunkDirectory.getAsEmbeddedMessage();
|
||||
assertNotNull(attachmentMSG);
|
||||
String body = attachmentMSG.getTextBody();
|
||||
assertNotNull(body);
|
||||
}
|
||||
}
|
||||
|
||||
/* => Writing isn't yet supported...
|
||||
// write out the file
|
||||
File file = TempFile.createTempFile("StressTest", ".msg");
|
||||
writeToFile(mapi, file);
|
||||
/* => Writing isn't yet supported...
|
||||
// write out the file
|
||||
File file = TempFile.createTempFile("StressTest", ".msg");
|
||||
writeToFile(mapi, file);
|
||||
|
||||
MAPIMessage read = new MAPIMessage(file.getAbsolutePath());
|
||||
assertNotNull(read.getAttachmentFiles());
|
||||
assertNotNull(read.getDisplayBCC());
|
||||
assertNotNull(read.getMessageDate());
|
||||
*/
|
||||
MAPIMessage read = new MAPIMessage(file.getAbsolutePath());
|
||||
assertNotNull(read.getAttachmentFiles());
|
||||
assertNotNull(read.getDisplayBCC());
|
||||
assertNotNull(read.getMessageDate());
|
||||
*/
|
||||
|
||||
// writing is not yet supported... handlePOIDocument(mapi);
|
||||
// writing is not yet supported... handlePOIDocument(mapi);
|
||||
|
||||
mapi.close();
|
||||
}
|
||||
mapi.close();
|
||||
}
|
||||
|
||||
// private void writeToFile(MAPIMessage mapi, File file)
|
||||
// throws FileNotFoundException, IOException {
|
||||
// OutputStream stream = new FileOutputStream(file);
|
||||
// try {
|
||||
// mapi.write(stream);
|
||||
// } finally {
|
||||
// stream.close();
|
||||
// }
|
||||
// }
|
||||
// private void writeToFile(MAPIMessage mapi, File file)
|
||||
// throws FileNotFoundException, IOException {
|
||||
// OutputStream stream = new FileOutputStream(file);
|
||||
// try {
|
||||
// mapi.write(stream);
|
||||
// } finally {
|
||||
// stream.close();
|
||||
// }
|
||||
// }
|
||||
|
||||
// a test-case to test this locally without executing the full TestAllFiles
|
||||
@Override
|
||||
// a test-case to test this locally without executing the full TestAllFiles
|
||||
@Override
|
||||
@Test
|
||||
void test() throws Exception {
|
||||
void test() throws Exception {
|
||||
File file = new File("test-data/hsmf/logsat.com_signatures_valid.msg");
|
||||
try (InputStream stream = new FileInputStream(file)) {
|
||||
handleFile(stream, file.getPath());
|
||||
}
|
||||
try (InputStream stream = new FileInputStream(file)) {
|
||||
handleFile(stream, file.getPath());
|
||||
}
|
||||
|
||||
handleExtracting(file);
|
||||
}
|
||||
handleExtracting(file);
|
||||
}
|
||||
}
|
|
@ -37,90 +37,90 @@ import org.apache.commons.io.output.NullPrintStream;
|
|||
import org.junit.jupiter.api.Test;
|
||||
|
||||
class HSSFFileHandler extends SpreadsheetHandler {
|
||||
private final POIFSFileHandler delegate = new POIFSFileHandler();
|
||||
@Override
|
||||
private final POIFSFileHandler delegate = new POIFSFileHandler();
|
||||
@Override
|
||||
public void handleFile(InputStream stream, String path) throws Exception {
|
||||
HSSFWorkbook wb = new HSSFWorkbook(stream);
|
||||
handleWorkbook(wb);
|
||||
HSSFWorkbook wb = new HSSFWorkbook(stream);
|
||||
handleWorkbook(wb);
|
||||
|
||||
// TODO: some documents fail currently...
|
||||
// TODO: some documents fail currently...
|
||||
// Note - as of Bugzilla 48036 (svn r828244, r828247) POI is capable of evaluating
|
||||
// IntersectionPtg. However it is still not capable of parsing it.
|
||||
// So FormulaEvalTestData.xls now contains a few formulas that produce errors here.
|
||||
//HSSFFormulaEvaluator evaluator = new HSSFFormulaEvaluator(wb);
|
||||
//evaluator.evaluateAll();
|
||||
|
||||
delegate.handlePOIDocument(wb);
|
||||
delegate.handlePOIDocument(wb);
|
||||
|
||||
// also try to see if some of the Records behave incorrectly
|
||||
// TODO: still fails on some records... RecordsStresser.handleWorkbook(wb);
|
||||
// also try to see if some of the Records behave incorrectly
|
||||
// TODO: still fails on some records... RecordsStresser.handleWorkbook(wb);
|
||||
|
||||
HSSFOptimiser.optimiseCellStyles(wb);
|
||||
for(Sheet sheet : wb) {
|
||||
for (Row row : sheet) {
|
||||
for (Cell cell : row) {
|
||||
assertNotNull(cell.getCellStyle());
|
||||
}
|
||||
}
|
||||
}
|
||||
HSSFOptimiser.optimiseCellStyles(wb);
|
||||
for(Sheet sheet : wb) {
|
||||
for (Row row : sheet) {
|
||||
for (Cell cell : row) {
|
||||
assertNotNull(cell.getCellStyle());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
HSSFOptimiser.optimiseFonts(wb);
|
||||
}
|
||||
HSSFOptimiser.optimiseFonts(wb);
|
||||
}
|
||||
|
||||
private static final Set<String> EXPECTED_ADDITIONAL_FAILURES = new HashSet<>();
|
||||
static {
|
||||
// encrypted
|
||||
EXPECTED_ADDITIONAL_FAILURES.add("spreadsheet/35897-type4.xls");
|
||||
EXPECTED_ADDITIONAL_FAILURES.add("spreadsheet/xor-encryption-abc.xls");
|
||||
EXPECTED_ADDITIONAL_FAILURES.add("spreadsheet/password.xls");
|
||||
// broken files
|
||||
EXPECTED_ADDITIONAL_FAILURES.add("spreadsheet/43493.xls");
|
||||
// TODO: ok to ignore?
|
||||
EXPECTED_ADDITIONAL_FAILURES.add("spreadsheet/50833.xls");
|
||||
EXPECTED_ADDITIONAL_FAILURES.add("spreadsheet/51832.xls");
|
||||
EXPECTED_ADDITIONAL_FAILURES.add("spreadsheet/XRefCalc.xls");
|
||||
EXPECTED_ADDITIONAL_FAILURES.add("spreadsheet/61300.xls");
|
||||
}
|
||||
private static final Set<String> EXPECTED_ADDITIONAL_FAILURES = new HashSet<>();
|
||||
static {
|
||||
// encrypted
|
||||
EXPECTED_ADDITIONAL_FAILURES.add("spreadsheet/35897-type4.xls");
|
||||
EXPECTED_ADDITIONAL_FAILURES.add("spreadsheet/xor-encryption-abc.xls");
|
||||
EXPECTED_ADDITIONAL_FAILURES.add("spreadsheet/password.xls");
|
||||
// broken files
|
||||
EXPECTED_ADDITIONAL_FAILURES.add("spreadsheet/43493.xls");
|
||||
// TODO: ok to ignore?
|
||||
EXPECTED_ADDITIONAL_FAILURES.add("spreadsheet/50833.xls");
|
||||
EXPECTED_ADDITIONAL_FAILURES.add("spreadsheet/51832.xls");
|
||||
EXPECTED_ADDITIONAL_FAILURES.add("spreadsheet/XRefCalc.xls");
|
||||
EXPECTED_ADDITIONAL_FAILURES.add("spreadsheet/61300.xls");
|
||||
}
|
||||
|
||||
@Override
|
||||
public void handleAdditional(File file) throws Exception {
|
||||
// redirect stdout as the examples often write lots of text
|
||||
PrintStream oldOut = System.out;
|
||||
String fileWithParent = file.getParentFile().getName() + "/" + file.getName();
|
||||
try {
|
||||
System.setOut(new NullPrintStream());
|
||||
@Override
|
||||
public void handleAdditional(File file) throws Exception {
|
||||
// redirect stdout as the examples often write lots of text
|
||||
PrintStream oldOut = System.out;
|
||||
String fileWithParent = file.getParentFile().getName() + "/" + file.getName();
|
||||
try {
|
||||
System.setOut(new NullPrintStream());
|
||||
|
||||
BiffViewer.main(new String[]{file.getAbsolutePath()});
|
||||
BiffViewer.main(new String[]{file.getAbsolutePath()});
|
||||
|
||||
assertFalse( EXPECTED_ADDITIONAL_FAILURES.contains(fileWithParent), "Expected Extraction to fail for file " + file + " and handler " + this + ", but did not fail!" );
|
||||
} catch (OldExcelFormatException e) {
|
||||
// old excel formats are not supported here
|
||||
} catch (RuntimeException e) {
|
||||
if(!EXPECTED_ADDITIONAL_FAILURES.contains(fileWithParent)) {
|
||||
throw e;
|
||||
}
|
||||
} finally {
|
||||
System.setOut(oldOut);
|
||||
}
|
||||
}
|
||||
assertFalse( EXPECTED_ADDITIONAL_FAILURES.contains(fileWithParent), "Expected Extraction to fail for file " + file + " and handler " + this + ", but did not fail!" );
|
||||
} catch (OldExcelFormatException e) {
|
||||
// old excel formats are not supported here
|
||||
} catch (RuntimeException e) {
|
||||
if(!EXPECTED_ADDITIONAL_FAILURES.contains(fileWithParent)) {
|
||||
throw e;
|
||||
}
|
||||
} finally {
|
||||
System.setOut(oldOut);
|
||||
}
|
||||
}
|
||||
|
||||
// a test-case to test this locally without executing the full TestAllFiles
|
||||
@Test
|
||||
void test() throws Exception {
|
||||
// a test-case to test this locally without executing the full TestAllFiles
|
||||
@Test
|
||||
void test() throws Exception {
|
||||
File file = new File("../test-data/spreadsheet/59074.xls");
|
||||
|
||||
try (InputStream stream = new FileInputStream(file)) {
|
||||
handleFile(stream, file.getPath());
|
||||
}
|
||||
try (InputStream stream = new FileInputStream(file)) {
|
||||
handleFile(stream, file.getPath());
|
||||
}
|
||||
|
||||
handleExtracting(file);
|
||||
handleExtracting(file);
|
||||
|
||||
handleAdditional(file);
|
||||
}
|
||||
handleAdditional(file);
|
||||
}
|
||||
|
||||
// a test-case to test this locally without executing the full TestAllFiles
|
||||
// a test-case to test this locally without executing the full TestAllFiles
|
||||
@Test
|
||||
@SuppressWarnings("java:S2699")
|
||||
@SuppressWarnings("java:S2699")
|
||||
void testExtractor() throws Exception {
|
||||
handleExtracting(new File("../test-data/spreadsheet/59074.xls"));
|
||||
}
|
||||
|
|
|
@ -31,7 +31,7 @@ import org.apache.poi.xwpf.usermodel.XWPFRelation;
|
|||
import org.junit.jupiter.api.Test;
|
||||
|
||||
class OPCFileHandler extends AbstractFileHandler {
|
||||
@Override
|
||||
@Override
|
||||
public void handleFile(InputStream stream, String path) throws Exception {
|
||||
// ignore password protected files
|
||||
if (POIXMLDocumentHandler.isEncrypted(stream)) return;
|
||||
|
@ -59,15 +59,15 @@ class OPCFileHandler extends AbstractFileHandler {
|
|||
// text-extraction is not possible currently for these types of files
|
||||
}
|
||||
|
||||
// a test-case to test this locally without executing the full TestAllFiles
|
||||
@Test
|
||||
void test() throws Exception {
|
||||
// a test-case to test this locally without executing the full TestAllFiles
|
||||
@Test
|
||||
void test() throws Exception {
|
||||
File file = new File("test-data/diagram/test.vsdx");
|
||||
|
||||
try (InputStream stream = new PushbackInputStream(new FileInputStream(file), 100000)) {
|
||||
handleFile(stream, file.getPath());
|
||||
}
|
||||
|
||||
handleExtracting(file);
|
||||
}
|
||||
handleExtracting(file);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -31,15 +31,15 @@ import org.junit.jupiter.api.Test;
|
|||
|
||||
class POIFSFileHandler extends AbstractFileHandler {
|
||||
|
||||
@Override
|
||||
@Override
|
||||
public void handleFile(InputStream stream, String path) throws Exception {
|
||||
try (POIFSFileSystem fs = new POIFSFileSystem(stream)) {
|
||||
handlePOIFSFileSystem(fs);
|
||||
handleHPSFProperties(fs);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void handleHPSFProperties(POIFSFileSystem fs) throws IOException {
|
||||
private void handleHPSFProperties(POIFSFileSystem fs) throws IOException {
|
||||
try (HPSFPropertiesExtractor ext = new HPSFPropertiesExtractor(fs)) {
|
||||
// can be null
|
||||
ext.getDocSummaryInformation();
|
||||
|
@ -52,12 +52,12 @@ class POIFSFileHandler extends AbstractFileHandler {
|
|||
}
|
||||
|
||||
private void handlePOIFSFileSystem(POIFSFileSystem fs) {
|
||||
assertNotNull(fs);
|
||||
assertNotNull(fs.getRoot());
|
||||
}
|
||||
assertNotNull(fs);
|
||||
assertNotNull(fs.getRoot());
|
||||
}
|
||||
|
||||
protected void handlePOIDocument(POIDocument doc) throws Exception {
|
||||
try (UnsynchronizedByteArrayOutputStream out = new UnsynchronizedByteArrayOutputStream()) {
|
||||
protected void handlePOIDocument(POIDocument doc) throws Exception {
|
||||
try (UnsynchronizedByteArrayOutputStream out = new UnsynchronizedByteArrayOutputStream()) {
|
||||
doc.write(out);
|
||||
|
||||
try (InputStream in = out.toInputStream();
|
||||
|
@ -65,7 +65,7 @@ class POIFSFileHandler extends AbstractFileHandler {
|
|||
handlePOIFSFileSystem(fs);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// a test-case to test this locally without executing the full TestAllFiles
|
||||
@Test
|
||||
|
|
|
@ -29,13 +29,13 @@ import org.apache.xmlbeans.XmlCursor;
|
|||
import org.apache.xmlbeans.XmlObject;
|
||||
|
||||
public final class POIXMLDocumentHandler {
|
||||
protected void handlePOIXMLDocument(POIXMLDocument doc) throws Exception {
|
||||
assertNotNull(doc.getAllEmbeddedParts());
|
||||
assertNotNull(doc.getPackage());
|
||||
assertNotNull(doc.getPackagePart());
|
||||
assertNotNull(doc.getProperties());
|
||||
assertNotNull(doc.getRelations());
|
||||
}
|
||||
protected void handlePOIXMLDocument(POIXMLDocument doc) throws Exception {
|
||||
assertNotNull(doc.getAllEmbeddedParts());
|
||||
assertNotNull(doc.getPackage());
|
||||
assertNotNull(doc.getPackagePart());
|
||||
assertNotNull(doc.getProperties());
|
||||
assertNotNull(doc.getRelations());
|
||||
}
|
||||
|
||||
protected static boolean isEncrypted(InputStream stream) throws IOException {
|
||||
if (FileMagic.valueOf(stream) == FileMagic.OLE2) {
|
||||
|
|
|
@ -33,68 +33,68 @@ import org.apache.poi.util.RecordFormatException;
|
|||
import org.apache.poi.xssf.usermodel.XSSFChartSheet;
|
||||
|
||||
public abstract class SpreadsheetHandler extends AbstractFileHandler {
|
||||
public void handleWorkbook(Workbook wb) throws IOException {
|
||||
// try to access some of the content
|
||||
readContent(wb);
|
||||
public void handleWorkbook(Workbook wb) throws IOException {
|
||||
// try to access some of the content
|
||||
readContent(wb);
|
||||
|
||||
// write out the file
|
||||
writeToArray(wb);
|
||||
// write out the file
|
||||
writeToArray(wb);
|
||||
|
||||
// access some more content (we had cases where writing corrupts the data in memory)
|
||||
readContent(wb);
|
||||
// access some more content (we had cases where writing corrupts the data in memory)
|
||||
readContent(wb);
|
||||
|
||||
// write once more
|
||||
UnsynchronizedByteArrayOutputStream out = writeToArray(wb);
|
||||
// write once more
|
||||
UnsynchronizedByteArrayOutputStream out = writeToArray(wb);
|
||||
|
||||
// read in the written file
|
||||
Workbook read = WorkbookFactory.create(out.toInputStream());
|
||||
// read in the written file
|
||||
Workbook read = WorkbookFactory.create(out.toInputStream());
|
||||
|
||||
assertNotNull(read);
|
||||
assertNotNull(read);
|
||||
|
||||
readContent(read);
|
||||
readContent(read);
|
||||
|
||||
extractEmbedded(read);
|
||||
extractEmbedded(read);
|
||||
|
||||
modifyContent(read);
|
||||
modifyContent(read);
|
||||
|
||||
read.close();
|
||||
}
|
||||
read.close();
|
||||
}
|
||||
|
||||
private UnsynchronizedByteArrayOutputStream writeToArray(Workbook wb) throws IOException {
|
||||
UnsynchronizedByteArrayOutputStream stream = new UnsynchronizedByteArrayOutputStream();
|
||||
wb.write(stream);
|
||||
return stream;
|
||||
}
|
||||
private UnsynchronizedByteArrayOutputStream writeToArray(Workbook wb) throws IOException {
|
||||
UnsynchronizedByteArrayOutputStream stream = new UnsynchronizedByteArrayOutputStream();
|
||||
wb.write(stream);
|
||||
return stream;
|
||||
}
|
||||
|
||||
private void readContent(Workbook wb) {
|
||||
for(int i = 0;i < wb.getNumberOfSheets();i++) {
|
||||
Sheet sheet = wb.getSheetAt(i);
|
||||
assertNotNull(wb.getSheet(sheet.getSheetName()));
|
||||
sheet.groupColumn((short) 4, (short) 5);
|
||||
sheet.setColumnGroupCollapsed(4, true);
|
||||
sheet.setColumnGroupCollapsed(4, false);
|
||||
private void readContent(Workbook wb) {
|
||||
for(int i = 0;i < wb.getNumberOfSheets();i++) {
|
||||
Sheet sheet = wb.getSheetAt(i);
|
||||
assertNotNull(wb.getSheet(sheet.getSheetName()));
|
||||
sheet.groupColumn((short) 4, (short) 5);
|
||||
sheet.setColumnGroupCollapsed(4, true);
|
||||
sheet.setColumnGroupCollapsed(4, false);
|
||||
|
||||
// don't do this for very large sheets as it will take a long time
|
||||
if(sheet.getPhysicalNumberOfRows() > 1000) {
|
||||
continue;
|
||||
}
|
||||
// don't do this for very large sheets as it will take a long time
|
||||
if(sheet.getPhysicalNumberOfRows() > 1000) {
|
||||
continue;
|
||||
}
|
||||
|
||||
for(Row row : sheet) {
|
||||
for(Cell cell : row) {
|
||||
assertNotNull(cell.toString());
|
||||
}
|
||||
}
|
||||
}
|
||||
for(Row row : sheet) {
|
||||
for(Cell cell : row) {
|
||||
assertNotNull(cell.toString());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (Name name : wb.getAllNames()) {
|
||||
// this sometimes caused exceptions
|
||||
for (Name name : wb.getAllNames()) {
|
||||
// this sometimes caused exceptions
|
||||
if(!name.isFunctionName()) {
|
||||
name.getRefersToFormula();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void extractEmbedded(Workbook wb) throws IOException {
|
||||
private void extractEmbedded(Workbook wb) throws IOException {
|
||||
EmbeddedExtractor ee = new EmbeddedExtractor();
|
||||
|
||||
for (Sheet s : wb) {
|
||||
|
@ -104,48 +104,48 @@ public abstract class SpreadsheetHandler extends AbstractFileHandler {
|
|||
assertNotNull(ed.getShape());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void modifyContent(Workbook wb) {
|
||||
/* a number of file fail because of various things: udf, unimplemented functions, ...
|
||||
we would need quite a list of excludes and the large regression tests would probably
|
||||
take a lot longer to run...
|
||||
try {
|
||||
// try to re-compute all formulas to find cases where parsing fails
|
||||
wb.getCreationHelper().createFormulaEvaluator().evaluateAll();
|
||||
} catch (RuntimeException e) {
|
||||
// only allow a specific exception which indicates that an external
|
||||
// reference was not found
|
||||
if(!e.getMessage().contains("Could not resolve external workbook name")) {
|
||||
throw e;
|
||||
}
|
||||
private void modifyContent(Workbook wb) {
|
||||
/* a number of file fail because of various things: udf, unimplemented functions, ...
|
||||
we would need quite a list of excludes and the large regression tests would probably
|
||||
take a lot longer to run...
|
||||
try {
|
||||
// try to re-compute all formulas to find cases where parsing fails
|
||||
wb.getCreationHelper().createFormulaEvaluator().evaluateAll();
|
||||
} catch (RuntimeException e) {
|
||||
// only allow a specific exception which indicates that an external
|
||||
// reference was not found
|
||||
if(!e.getMessage().contains("Could not resolve external workbook name")) {
|
||||
throw e;
|
||||
}
|
||||
|
||||
}*/
|
||||
}*/
|
||||
|
||||
for (int i=wb.getNumberOfSheets()-1; i>=0; i--) {
|
||||
if(wb.getSheetAt(i) instanceof XSSFChartSheet) {
|
||||
// clone for chart-sheets is not supported
|
||||
continue;
|
||||
}
|
||||
for (int i=wb.getNumberOfSheets()-1; i>=0; i--) {
|
||||
if(wb.getSheetAt(i) instanceof XSSFChartSheet) {
|
||||
// clone for chart-sheets is not supported
|
||||
continue;
|
||||
}
|
||||
|
||||
try {
|
||||
wb.cloneSheet(i);
|
||||
} catch (RecordFormatException e) {
|
||||
if (e.getCause() instanceof CloneNotSupportedException) {
|
||||
// ignore me
|
||||
continue;
|
||||
}
|
||||
throw e;
|
||||
} catch (RuntimeException e) {
|
||||
if ("Could not find 'internal references' EXTERNALBOOK".equals(e.getMessage()) ||
|
||||
"CountryRecord not found".equals(e.getMessage()) ||
|
||||
"CountryRecord or SSTRecord not found".equals(e.getMessage()) ||
|
||||
"Cannot add more than 65535 shapes".equals(e.getMessage()) ) {
|
||||
// ignore these here for now
|
||||
continue;
|
||||
try {
|
||||
wb.cloneSheet(i);
|
||||
} catch (RecordFormatException e) {
|
||||
if (e.getCause() instanceof CloneNotSupportedException) {
|
||||
// ignore me
|
||||
continue;
|
||||
}
|
||||
throw e;
|
||||
}
|
||||
}
|
||||
}
|
||||
throw e;
|
||||
} catch (RuntimeException e) {
|
||||
if ("Could not find 'internal references' EXTERNALBOOK".equals(e.getMessage()) ||
|
||||
"CountryRecord not found".equals(e.getMessage()) ||
|
||||
"CountryRecord or SSTRecord not found".equals(e.getMessage()) ||
|
||||
"Cannot add more than 65535 shapes".equals(e.getMessage()) ) {
|
||||
// ignore these here for now
|
||||
continue;
|
||||
}
|
||||
throw e;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -31,55 +31,55 @@ import org.apache.poi.xslf.usermodel.XSLFSlideShow;
|
|||
import org.junit.jupiter.api.Test;
|
||||
|
||||
class XSLFFileHandler extends SlideShowHandler {
|
||||
@Override
|
||||
@Override
|
||||
public void handleFile(InputStream stream, String path) throws Exception {
|
||||
try (XMLSlideShow slide = new XMLSlideShow(stream);
|
||||
XSLFSlideShow slideInner = new XSLFSlideShow(slide.getPackage())) {
|
||||
;
|
||||
assertNotNull(slideInner.getPresentation());
|
||||
assertNotNull(slideInner.getSlideMasterReferences());
|
||||
assertNotNull(slideInner.getSlideReferences());
|
||||
try (XMLSlideShow slide = new XMLSlideShow(stream);
|
||||
XSLFSlideShow slideInner = new XSLFSlideShow(slide.getPackage())) {
|
||||
;
|
||||
assertNotNull(slideInner.getPresentation());
|
||||
assertNotNull(slideInner.getSlideMasterReferences());
|
||||
assertNotNull(slideInner.getSlideReferences());
|
||||
|
||||
new POIXMLDocumentHandler().handlePOIXMLDocument(slide);
|
||||
new POIXMLDocumentHandler().handlePOIXMLDocument(slide);
|
||||
|
||||
handleSlideShow(slide);
|
||||
} catch (POIXMLException e) {
|
||||
Exception cause = (Exception)e.getCause();
|
||||
throw cause == null ? e : cause;
|
||||
}
|
||||
}
|
||||
handleSlideShow(slide);
|
||||
} catch (POIXMLException e) {
|
||||
Exception cause = (Exception)e.getCause();
|
||||
throw cause == null ? e : cause;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
@Override
|
||||
public void handleExtracting(File file) throws Exception {
|
||||
super.handleExtracting(file);
|
||||
|
||||
|
||||
// additionally try the other getText() methods
|
||||
try (SlideShowExtractor<?,?> extractor = (SlideShowExtractor<?, ?>) ExtractorFactory.createExtractor(file)) {
|
||||
assertNotNull(extractor);
|
||||
extractor.setSlidesByDefault(true);
|
||||
extractor.setNotesByDefault(true);
|
||||
extractor.setMasterByDefault(true);
|
||||
try (SlideShowExtractor<?,?> extractor = (SlideShowExtractor<?, ?>) ExtractorFactory.createExtractor(file)) {
|
||||
assertNotNull(extractor);
|
||||
extractor.setSlidesByDefault(true);
|
||||
extractor.setNotesByDefault(true);
|
||||
extractor.setMasterByDefault(true);
|
||||
|
||||
assertNotNull(extractor.getText());
|
||||
assertNotNull(extractor.getText());
|
||||
|
||||
extractor.setSlidesByDefault(false);
|
||||
extractor.setNotesByDefault(false);
|
||||
extractor.setMasterByDefault(false);
|
||||
extractor.setSlidesByDefault(false);
|
||||
extractor.setNotesByDefault(false);
|
||||
extractor.setMasterByDefault(false);
|
||||
|
||||
assertEquals("", extractor.getText(), "With all options disabled we should not get text");
|
||||
}
|
||||
assertEquals("", extractor.getText(), "With all options disabled we should not get text");
|
||||
}
|
||||
}
|
||||
|
||||
// a test-case to test this locally without executing the full TestAllFiles
|
||||
@Override
|
||||
@Override
|
||||
@Test
|
||||
void test() throws Exception {
|
||||
void test() throws Exception {
|
||||
File file = new File("test-data/slideshow/ca.ubc.cs.people_~emhill_presentations_HowWeRefactor.pptx");
|
||||
try (InputStream stream = new FileInputStream(file)) {
|
||||
handleFile(stream, file.getPath());
|
||||
}
|
||||
try (InputStream stream = new FileInputStream(file)) {
|
||||
handleFile(stream, file.getPath());
|
||||
}
|
||||
|
||||
handleExtracting(file);
|
||||
}
|
||||
handleExtracting(file);
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue