Add test for FileMagic

JavaDoc warning fixes
Remove some IDE warnings
Reformat code of sample application

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1847437 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Dominik Stadler 2018-11-25 21:56:43 +00:00
parent 061db56f2d
commit d28720afd0
15 changed files with 170 additions and 94 deletions

View File

@ -35,7 +35,6 @@ import java.util.Iterator;
import java.util.Locale;
import java.util.Set;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.TransformerException;
import org.apache.poi.EncryptedDocumentException;
@ -148,7 +147,7 @@ public class XSSFFileHandler extends SpreadsheetHandler {
}
private void exportToXML(XSSFWorkbook wb) throws SAXException,
ParserConfigurationException, TransformerException {
TransformerException {
for (XSSFMap map : wb.getCustomXMLMappings()) {
XSSFExportToXml exporter = new XSSFExportToXml(map);
@ -165,7 +164,6 @@ public class XSSFFileHandler extends SpreadsheetHandler {
// zip-bomb
EXPECTED_ADDITIONAL_FAILURES.add("spreadsheet/54764.xlsx");
EXPECTED_ADDITIONAL_FAILURES.add("spreadsheet/54764-2.xlsx");
EXPECTED_ADDITIONAL_FAILURES.add("spreadsheet/54764.xlsx");
EXPECTED_ADDITIONAL_FAILURES.add("spreadsheet/poc-xmlbomb.xlsx");
EXPECTED_ADDITIONAL_FAILURES.add("spreadsheet/poc-xmlbomb-empty.xlsx");
// strict OOXML
@ -185,18 +183,19 @@ public class XSSFFileHandler extends SpreadsheetHandler {
public void handleAdditional(File file) throws Exception {
// redirect stdout as the examples often write lots of text
PrintStream oldOut = System.out;
String testFile = file.getParentFile().getName() + "/" + file.getName();
try {
System.setOut(new NullPrintStream());
FromHowTo.main(new String[]{file.getAbsolutePath()});
XLSX2CSV.main(new String[]{file.getAbsolutePath()});
assertFalse("Expected Extraction to fail for file " + file + " and handler " + this + ", but did not fail!",
EXPECTED_ADDITIONAL_FAILURES.contains(file.getParentFile().getName() + "/" + file.getName()));
EXPECTED_ADDITIONAL_FAILURES.contains(testFile));
} catch (OLE2NotOfficeXmlFileException e) {
// we have some files that are not actually OOXML and thus cannot be tested here
} catch (IllegalArgumentException | InvalidFormatException | POIXMLException | IOException e) {
if(!EXPECTED_ADDITIONAL_FAILURES.contains(file.getParentFile().getName() + "/" + file.getName())) {
if(!EXPECTED_ADDITIONAL_FAILURES.contains(testFile)) {
throw e;
}
} finally {

View File

@ -36,7 +36,7 @@ import org.apache.poi.util.POILogger;
* Supports reading and writing of variant data.<p>
*
* <strong>FIXME (3):</strong> Reading and writing should be made more
* uniform than it is now. The following items should be resolved:<p>
* uniform than it is now. The following items should be resolved:
*
* <ul>
*

View File

@ -33,8 +33,11 @@ import org.apache.poi.hpsf.SummaryInformation;
* The methods {@link #getSummaryInformationProperties} and {@link
* #getDocumentSummaryInformationProperties} return singleton {@link
* PropertyIDMap}s. An application that wants to extend these maps
* should treat them as unmodifiable, copy them and modifiy the
* should treat them as unmodifiable, copy them and modify the
* copies.
*
* Trying to modify the map directly will cause exceptions
* {@link UnsupportedOperationException} to be thrown.
*/
public class PropertyIDMap implements Map<Long,String> {
@ -490,11 +493,13 @@ public class PropertyIDMap implements Map<Long,String> {
@Override
public String put(Long key, String value) {
//noinspection ConstantConditions
return idMap.put(key, value);
}
@Override
public String remove(Object key) {
//noinspection ConstantConditions
return idMap.remove(key);
}

View File

@ -2276,6 +2276,8 @@ public final class InternalWorkbook {
/**
* Only for internal calls - code based on this is not supported ...
*
* @return The list of records.
*/
@Internal
public WorkbookRecordList getWorkbookRecordList() {

View File

@ -33,8 +33,9 @@ import org.apache.poi.util.LittleEndianInputStream;
import org.apache.poi.util.RecordFormatException;
/**
* Title: Record Input Stream<P>
* Description: Wraps a stream and provides helper methods for the construction of records.<P>
* Title: Record Input Stream
*
* Description: Wraps a stream and provides helper methods for the construction of records.
*/
public final class RecordInputStream implements LittleEndianInput {
@ -194,11 +195,11 @@ public final class RecordInputStream implements LittleEndianInput {
private int readNextSid() {
int nAvailable = _bhi.available();
if (nAvailable < EOFRecord.ENCODED_SIZE) {
if (nAvailable > 0) {
/*if (nAvailable > 0) {
// some scrap left over?
// ex45582-22397.xls has one extra byte after the last record
// Excel reads that file OK
}
}*/
return INVALID_SID_VALUE;
}
int result = _bhi.readRecordSID();
@ -305,14 +306,13 @@ public final class RecordInputStream implements LittleEndianInput {
@Override
public double readDouble() {
long valueLongBits = readLong();
double result = Double.longBitsToDouble(valueLongBits);
if (Double.isNaN(result)) {
/*if (Double.isNaN(result)) {
// YK: Excel doesn't write NaN but instead converts the cell type into {@link CellType#ERROR}.
// HSSF prior to version 3.7 had a bug: it could write Double.NaN but could not read such a file back.
// This behavior was fixed in POI-3.7.
//throw new RuntimeException("Did not expect to read NaN"); // (Because Excel typically doesn't write NaN)
}
return result;
}*/
return Double.longBitsToDouble(valueLongBits);
}
public void readPlain(byte[] buf, int off, int len) {

View File

@ -161,7 +161,7 @@ public final class SSTRecord extends ContinuableRecord {
* <P>
* The data consists of sets of string data. This string data is
* arranged as follows:
* <P>
* </P><P>
* <pre>
* short string_length; // length of string data
* byte string_flag; // flag specifying special string
@ -176,9 +176,9 @@ public final class SSTRecord extends ContinuableRecord {
* byte[] extension; // optional extension (length of array
* // is extend_length)
* </pre>
* <P>
* </P><P>
* The string_flag is bit mapped as follows:
* <P>
* </P><P>
* <TABLE summary="string_flag mapping">
* <TR>
* <TH>Bit number</TH>
@ -232,7 +232,7 @@ public final class SSTRecord extends ContinuableRecord {
* associated data. The UnicodeString class can handle the byte[]
* vs short[] nature of the actual string data
*
* @param in the RecordInputstream to read the record from
* @param in the RecordInputStream to read the record from
*/
public SSTRecord(RecordInputStream in) {
// this method is ALWAYS called after construction -- using

View File

@ -77,7 +77,7 @@ public final class SharedFormulaRecord extends SharedValueRecordBase {
public String toString()
{
StringBuffer buffer = new StringBuffer();
StringBuilder buffer = new StringBuilder();
buffer.append("[SHARED FORMULA (").append(HexDump.intToHex(sid)).append("]\n");
buffer.append(" .range = ").append(getRange()).append("\n");
@ -99,6 +99,10 @@ public final class SharedFormulaRecord extends SharedValueRecordBase {
}
/**
* Convert formula into an array of {@link Ptg} tokens.
*
* @param formula The record to break into tokens, cannot be null
*
* @return the equivalent {@link Ptg} array that the formula would have, were it not shared.
*/
public Ptg[] getFormulaTokens(FormulaRecord formula) {

View File

@ -42,6 +42,8 @@ public abstract class SharedValueRecordBase extends StandardRecord {
/**
* reads only the range (1 {@link CellRangeAddress8Bit}) from the stream
*
* @param in The interface for reading the record data.
*/
public SharedValueRecordBase(LittleEndianInput in) {
_range = new CellRangeAddress8Bit(in);
@ -99,14 +101,12 @@ public abstract class SharedValueRecordBase extends StandardRecord {
&& r.getLastColumn() >= colIx;
}
/**
* @return {@code true} if (rowIx, colIx) describes the first cell in this shared value
* object's range
*
* @param rowIx the row index
* @param colIx the column index
*
* @return {@code true} if its the first cell in this shared value object range
*
*
* @return {@code true} if (rowIx, colIx) describes the first cell in this shared value
* object's range
*
* @see #getRange()
*/
public final boolean isFirstCell(int rowIx, int colIx) {

View File

@ -45,8 +45,8 @@ public class RecordFormatException
* be thrown. If assertTrue is <code>false</code>, this will throw this
* exception with the message.
*
* @param assertTrue
* @param message
* @param assertTrue If false, the exception is thrown, if true, no action is performed
* @param message The message to include in the thrown exception
*/
public static void check(boolean assertTrue, String message) {
if (! assertTrue) {

View File

@ -22,41 +22,37 @@ import org.apache.poi.extractor.POITextExtractor;
/**
* A command line wrapper around {@link ExtractorFactory}, useful
* for when debugging.
* for when debugging.
*/
public class CommandLineTextExtractor {
public static final String DIVIDER = "=======================";
public static void main(String[] args) throws Exception {
if(args.length < 1) {
System.err.println("Use:");
System.err.println(" CommandLineTextExtractor <filename> [filename] [filename]");
System.exit(1);
}
public static final String DIVIDER = "=======================";
for (String arg : args) {
System.out.println(DIVIDER);
public static void main(String[] args) throws Exception {
if (args.length < 1) {
System.err.println("Use:");
System.err.println(" CommandLineTextExtractor <filename> [filename] [filename]");
System.exit(1);
}
File f = new File(arg);
System.out.println(f);
for (String arg : args) {
System.out.println(DIVIDER);
POITextExtractor extractor =
ExtractorFactory.createExtractor(f);
try {
POITextExtractor metadataExtractor =
extractor.getMetadataTextExtractor();
File f = new File(arg);
System.out.println(f);
System.out.println(" " + DIVIDER);
String metaData = metadataExtractor.getText();
System.out.println(metaData);
System.out.println(" " + DIVIDER);
String text = extractor.getText();
System.out.println(text);
System.out.println(DIVIDER);
System.out.println("Had " + metaData.length() + " characters of metadata and " + text.length() + " characters of text");
} finally {
extractor.close();
}
}
}
try (POITextExtractor extractor = ExtractorFactory.createExtractor(f)) {
POITextExtractor metadataExtractor =
extractor.getMetadataTextExtractor();
System.out.println(" " + DIVIDER);
String metaData = metadataExtractor.getText();
System.out.println(metaData);
System.out.println(" " + DIVIDER);
String text = extractor.getText();
System.out.println(text);
System.out.println(DIVIDER);
System.out.println("Had " + metaData.length() + " characters of metadata and " + text.length() + " characters of text");
}
}
}
}

View File

@ -154,8 +154,6 @@ public final class ZipHelper {
"The supplied data appears to be a raw XML file. " +
"Formats such as Office 2003 XML are not supported");
default:
case OOXML:
case UNKNOWN:
// Don't check for a Zip header, as to maintain backwards
// compatibility we need to let them seek over junk at the
// start before beginning processing.

View File

@ -19,6 +19,7 @@ package org.apache.poi.ooxml;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertNotSame;
import static org.junit.Assert.assertNull;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;
@ -89,7 +90,7 @@ public final class TestPOIXMLProperties {
XSSFWorkbook newWorkbook =
XSSFTestDataSamples.writeOutAndReadBack(workbook);
workbook.close();
assertTrue(workbook != newWorkbook);
assertNotSame(workbook, newWorkbook);
POIXMLProperties newProps = newWorkbook.getProperties();
@ -158,7 +159,7 @@ public final class TestPOIXMLProperties {
p = ctProps.getPropertyArray(3);
assertEquals("{D5CDD505-2E9C-101B-9397-08002B2CF9AE}", p.getFmtid());
assertEquals("test-4", p.getName());
assertEquals(true, p.getBool());
assertTrue(p.getBool());
assertEquals(5, p.getPid());
wb2.close();

View File

@ -42,7 +42,7 @@ public class TestXDGFVisioExtractor {
}
@After
public void closeResoures() throws IOException {
public void closeResources() throws IOException {
if(xml != null) {
xml.close();
}

View File

@ -70,7 +70,7 @@ public class XSSFTestDataSamples {
* @param wb the workbook to write
* @param testName a fragment of the filename
* @return the location where the workbook was saved
* @throws IOException
* @throws IOException If writing the file fails
*/
public static <R extends Workbook> File writeOut(R wb, String testName) throws IOException {
final File file = getOutputFile(testName);
@ -104,7 +104,9 @@ public class XSSFTestDataSamples {
file = TempFile.createTempFile(testName, ".xlsx");
}
if (file.exists()) {
file.delete();
if(!file.delete()) {
throw new IOException("Could not delete file " + file);
}
}
return file;
}
@ -114,7 +116,7 @@ public class XSSFTestDataSamples {
*
* @param wb the workbook to write
* @return the memory buffer
* @throws IOException
* @throws IOException If writing the file fails
*/
public static <R extends Workbook> ByteArrayOutputStream writeOut(R wb) throws IOException {
ByteArrayOutputStream out = new ByteArrayOutputStream(8192);
@ -137,7 +139,7 @@ public class XSSFTestDataSamples {
* to avoid creating a temporary file. However, this may complicate the calling
* code to avoid having the workbook, BAOS, and BAIS open at the same time.
*
* @param wb
* @param wb The workbook to write out, it is closed after the call.
* @param testName file name to be used to write to a file. This file will be cleaned up by a call to readBack(String)
* @return workbook location
* @throws RuntimeException if {@link #TEST_OUTPUT_DIR} System property is not set
@ -161,18 +163,13 @@ public class XSSFTestDataSamples {
*
* @param wb the workbook to write
* @return the memory buffer
* @throws IOException
* @throws RuntimeException If writing the file fails
*/
public static <R extends Workbook> ByteArrayOutputStream writeOutAndClose(R wb) {
try {
ByteArrayOutputStream out = writeOut(wb);
// Do not close the workbook if there was a problem writing the workbook
wb.close();
return out;
}
catch (final IOException e) {
throw new RuntimeException(e);
}
public static <R extends Workbook> ByteArrayOutputStream writeOutAndClose(R wb) throws IOException {
ByteArrayOutputStream out = writeOut(wb);
// Do not close the workbook if there was a problem writing the workbook
wb.close();
return out;
}
/**
@ -183,12 +180,14 @@ public class XSSFTestDataSamples {
*
* @param file the workbook file to read and delete
* @return the read back workbook
* @throws IOException
* @throws IOException If reading or deleting the file fails
*/
public static XSSFWorkbook readBackAndDelete(File file) throws IOException {
XSSFWorkbook wb = readBack(file);
// do not delete the file if there's an error--might be helpful for debugging
file.delete();
if(!file.delete()) {
throw new IOException("Could not delete file " + file + " after reading");
}
return wb;
}
@ -198,16 +197,12 @@ public class XSSFTestDataSamples {
*
* @param file the workbook file to read
* @return the read back workbook
* @throws IOException
* @throws IOException If reading the file fails
*/
public static XSSFWorkbook readBack(File file) throws IOException {
InputStream in = new FileInputStream(file);
try {
try (InputStream in = new FileInputStream(file)) {
return new XSSFWorkbook(in);
}
finally {
in.close();
}
}
/**
@ -216,17 +211,13 @@ public class XSSFTestDataSamples {
*
* @param out the output stream to read back from
* @return the read back workbook
* @throws IOException
* @throws IOException If reading the file fails
*/
public static XSSFWorkbook readBack(ByteArrayOutputStream out) throws IOException {
InputStream is = new ByteArrayInputStream(out.toByteArray());
out.close();
try {
try (InputStream is = new ByteArrayInputStream(out.toByteArray())) {
out.close();
return new XSSFWorkbook(is);
}
finally {
is.close();
}
}
/**

View File

@ -0,0 +1,80 @@
/* ====================================================================
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==================================================================== */
package org.apache.poi.poifs.filesystem;
import org.apache.poi.POIDataSamples;
import org.junit.Test;
import java.io.BufferedInputStream;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import static org.junit.Assert.*;
public class TestFileMagic {
@Test
public void testFileMagic() {
assertEquals(FileMagic.XML, FileMagic.valueOf("XML"));
assertEquals(FileMagic.XML, FileMagic.valueOf("<?xml".getBytes()));
assertEquals(FileMagic.HTML, FileMagic.valueOf("HTML"));
assertEquals(FileMagic.HTML, FileMagic.valueOf("<!DOCTYP".getBytes()));
assertEquals(FileMagic.HTML, FileMagic.valueOf("<!DOCTYPE".getBytes()));
assertEquals(FileMagic.HTML, FileMagic.valueOf("<html".getBytes()));
try {
FileMagic.valueOf("some string");
fail("Should catch exception here");
} catch (IllegalArgumentException e) {
// expected here
}
}
@Test
public void testFileMagicFile() throws IOException {
assertEquals(FileMagic.OLE2, FileMagic.valueOf(POIDataSamples.getSpreadSheetInstance().getFile("SampleSS.xls")));
assertEquals(FileMagic.OOXML, FileMagic.valueOf(POIDataSamples.getSpreadSheetInstance().getFile("SampleSS.xlsx")));
}
@Test
public void testFileMagicStream() throws IOException {
try (InputStream stream = new BufferedInputStream(new FileInputStream(POIDataSamples.getSpreadSheetInstance().getFile("SampleSS.xls")))) {
assertEquals(FileMagic.OLE2, FileMagic.valueOf(stream));
}
try (InputStream stream = new BufferedInputStream(new FileInputStream(POIDataSamples.getSpreadSheetInstance().getFile("SampleSS.xlsx")))) {
assertEquals(FileMagic.OOXML, FileMagic.valueOf(stream));
}
}
@Test
public void testPrepare() throws IOException {
try (InputStream stream = new BufferedInputStream(new FileInputStream(POIDataSamples.getSpreadSheetInstance().getFile("SampleSS.xlsx")))) {
assertSame(stream, FileMagic.prepareToCheckMagic(stream));
}
try (InputStream stream = new InputStream() {
@Override
public int read() {
return 0;
}
}) {
assertNotSame(stream, FileMagic.prepareToCheckMagic(stream));
}
}
}