diff --git a/src/ooxml/java/org/apache/poi/openxml4j/opc/PackagingURIHelper.java b/src/ooxml/java/org/apache/poi/openxml4j/opc/PackagingURIHelper.java index d74e0a13ef..c4169d9dbf 100644 --- a/src/ooxml/java/org/apache/poi/openxml4j/opc/PackagingURIHelper.java +++ b/src/ooxml/java/org/apache/poi/openxml4j/opc/PackagingURIHelper.java @@ -484,7 +484,7 @@ public final class PackagingURIHelper { throws InvalidFormatException { URI partNameURI; try { - partNameURI = new URI(partName); + partNameURI = new URI(resolvePartName(partName)); } catch (URISyntaxException e) { throw new InvalidFormatException(e.getMessage()); } @@ -646,4 +646,29 @@ public final class PackagingURIHelper { } return retPartName; } + + /** + * If part name is not a valid URI, it is resolved as follows: + *

+ * 1. Percent-encode each open bracket ([) and close bracket (]). + * 2. Percent-encode each percent (%) character that is not followed by a hexadecimal notation of an octet value. + * 3. Un-percent-encode each percent-encoded unreserved character. + * 4. Un-percent-encode each forward slash (/) and back slash (\). + * 5. Convert all back slashes to forward slashes. + * 6. If present in a segment containing non-dot (?.?) characters, remove trailing dot (?.?) characters from each segment. + * 7. Replace each occurrence of multiple consecutive forward slashes (/) with a single forward slash. + * 8. If a single trailing forward slash (/) is present, remove that trailing forward slash. + * 9. Remove complete segments that consist of three or more dots. + * 10. Resolve the relative reference against the base URI of the part holding the Unicode string, as it is defined + * in ?5.2 of RFC 3986. The path component of the resulting absolute URI is the part name. + *

+ * + * @param partName the name to resolve + * @return the resolved part name that should be OK to construct a URI + * + * TODO YK: for now this method does only (5). Finish the rest. + */ + public static String resolvePartName(String partName){ + return partName.replace('\\', '/'); + } } diff --git a/src/ooxml/java/org/apache/poi/openxml4j/opc/ZipPackage.java b/src/ooxml/java/org/apache/poi/openxml4j/opc/ZipPackage.java index 2e211a64d1..114c75c06e 100644 --- a/src/ooxml/java/org/apache/poi/openxml4j/opc/ZipPackage.java +++ b/src/ooxml/java/org/apache/poi/openxml4j/opc/ZipPackage.java @@ -130,7 +130,7 @@ public final class ZipPackage extends Package { Enumeration entries = this.zipArchive.getEntries(); while (entries.hasMoreElements()) { ZipEntry entry = entries.nextElement(); - if (entry.getName().equals( + if (entry.getName().equalsIgnoreCase( ContentTypeManager.CONTENT_TYPES_PART_NAME)) { try { this.contentTypeManager = new ZipContentTypeManager( @@ -208,7 +208,7 @@ public final class ZipPackage extends Package { try { // We get an error when we parse [Content_Types].xml // because it's not a valid URI. - if (entry.getName().equals( + if (entry.getName().equalsIgnoreCase( ContentTypeManager.CONTENT_TYPES_PART_NAME)) { return null; } @@ -218,7 +218,7 @@ public final class ZipPackage extends Package { // We assume we can continue, even in degraded mode ... logger.log(POILogger.WARN,"Entry " + entry.getName() - + " is not valid, so this part won't be add to the package."); + + " is not valid, so this part won't be add to the package.", e); return null; } } diff --git a/src/ooxml/testcases/org/apache/poi/xssf/usermodel/TestXSSFBugs.java b/src/ooxml/testcases/org/apache/poi/xssf/usermodel/TestXSSFBugs.java index 504eceb0ac..5a6e250d9e 100644 --- a/src/ooxml/testcases/org/apache/poi/xssf/usermodel/TestXSSFBugs.java +++ b/src/ooxml/testcases/org/apache/poi/xssf/usermodel/TestXSSFBugs.java @@ -397,4 +397,19 @@ public final class TestXSSFBugs extends BaseTestBugzillaIssues { } catch(IllegalStateException e) {} } } + + /** + * A problem file from a non-standard source (a scientific instrument that saves its + * output as an .xlsx file) that have two issues: + * 1. The Content Type part name is lower-case: [content_types].xml + * 2. The file appears to use backslashes as path separators + * + * The OPC spec tolerates both of these peculiarities, so does POI + */ + public void test49609() throws Exception { + XSSFWorkbook wb = XSSFTestDataSamples.openSampleWorkbook("49609.xlsx"); + assertEquals("FAM", wb.getSheetName(0)); + assertEquals("Cycle", wb.getSheetAt(0).getRow(0).getCell(1).getStringCellValue()); + + } } diff --git a/test-data/spreadsheet/49609.xlsx b/test-data/spreadsheet/49609.xlsx new file mode 100755 index 0000000000..03d9d12ca2 Binary files /dev/null and b/test-data/spreadsheet/49609.xlsx differ