mirror of https://github.com/apache/poi.git
Fix bug #52540 - Relax the M4.1 constraint on reading OOXML files, as some Office produced ones do have 2 Core Properties, despite the specification explicitly forbidding this
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1237631 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
91f163c97a
commit
bcb898e977
|
@ -34,6 +34,7 @@
|
||||||
|
|
||||||
<changes>
|
<changes>
|
||||||
<release version="3.8-beta6" date="2012-??-??">
|
<release version="3.8-beta6" date="2012-??-??">
|
||||||
|
<action dev="poi-developers" type="fix">52540 - Relax the M4.1 constraint on reading OOXML files, as some Office produced ones do have 2 Core Properties, despite the specification explicitly forbidding this</action>
|
||||||
<action dev="poi-developers" type="add">52462 - Added implementation for SUMIFS()</action>
|
<action dev="poi-developers" type="add">52462 - Added implementation for SUMIFS()</action>
|
||||||
<action dev="poi-developers" type="add">POIXMLPropertiesTextExtractor support for extracting custom OOXML properties as text</action>
|
<action dev="poi-developers" type="add">POIXMLPropertiesTextExtractor support for extracting custom OOXML properties as text</action>
|
||||||
<action dev="poi-developers" type="fix">52449 - Support writing XWPF documents with glossaries (Glossaries are not yet supported, but can now be written out again without changes)</action>
|
<action dev="poi-developers" type="fix">52449 - Support writing XWPF documents with glossaries (Glossaries are not yet supported, but can now be written out again without changes)</action>
|
||||||
|
|
|
@ -597,8 +597,13 @@ public abstract class OPCPackage implements RelationshipSource, Closeable {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Load the parts of the archive if it has not been done yet The
|
* Load the parts of the archive if it has not been done yet. The
|
||||||
* relationships of each part are not loaded
|
* relationships of each part are not loaded.
|
||||||
|
*
|
||||||
|
* Note - Rule M4.1 states that there may only ever be one Core
|
||||||
|
* Properties Part, but Office produced files will sometimes
|
||||||
|
* have multiple! As Office ignores all but the first, we relax
|
||||||
|
* Compliance with Rule M4.1, and ignore all others silently too.
|
||||||
*
|
*
|
||||||
* @return All this package's parts.
|
* @return All this package's parts.
|
||||||
*/
|
*/
|
||||||
|
@ -609,31 +614,36 @@ public abstract class OPCPackage implements RelationshipSource, Closeable {
|
||||||
if (partList == null) {
|
if (partList == null) {
|
||||||
/* Variables use to validate OPC Compliance */
|
/* Variables use to validate OPC Compliance */
|
||||||
|
|
||||||
// Ensure rule M4.1 -> A format consumer shall consider more than
|
// Check rule M4.1 -> A format consumer shall consider more than
|
||||||
// one core properties relationship for a package to be an error
|
// one core properties relationship for a package to be an error
|
||||||
|
// (We just log it and move on, as real files break this!)
|
||||||
boolean hasCorePropertiesPart = false;
|
boolean hasCorePropertiesPart = false;
|
||||||
|
boolean needCorePropertiesPart = true;
|
||||||
|
|
||||||
PackagePart[] parts = this.getPartsImpl();
|
PackagePart[] parts = this.getPartsImpl();
|
||||||
this.partList = new PackagePartCollection();
|
this.partList = new PackagePartCollection();
|
||||||
for (PackagePart part : parts) {
|
for (PackagePart part : parts) {
|
||||||
if (partList.containsKey(part._partName))
|
if (partList.containsKey(part._partName))
|
||||||
throw new InvalidFormatException(
|
throw new InvalidFormatException(
|
||||||
"A part with the name '"
|
"A part with the name '" +
|
||||||
+ part._partName
|
part._partName +
|
||||||
+ "' already exist : Packages shall not contain equivalent part names and package implementers shall neither create nor recognize packages with equivalent part names. [M1.12]");
|
"' already exist : Packages shall not contain equivalent " +
|
||||||
|
"part names and package implementers shall neither create " +
|
||||||
|
"nor recognize packages with equivalent part names. [M1.12]");
|
||||||
|
|
||||||
// Check OPC compliance rule M4.1
|
// Check OPC compliance rule M4.1
|
||||||
if (part.getContentType().equals(
|
if (part.getContentType().equals(
|
||||||
ContentTypes.CORE_PROPERTIES_PART)) {
|
ContentTypes.CORE_PROPERTIES_PART)) {
|
||||||
if (!hasCorePropertiesPart)
|
if (!hasCorePropertiesPart) {
|
||||||
hasCorePropertiesPart = true;
|
hasCorePropertiesPart = true;
|
||||||
else
|
} else {
|
||||||
throw new InvalidFormatException(
|
logger.log(POILogger.WARN, "OPC Compliance error [M4.1]: " +
|
||||||
"OPC Compliance error [M4.1]: there is more than one core properties relationship in the package !");
|
"there is more than one core properties relationship in the package! " +
|
||||||
|
"POI will use only the first, but other software may reject this file.");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
PartUnmarshaller partUnmarshaller = partUnmarshallers
|
PartUnmarshaller partUnmarshaller = partUnmarshallers.get(part._contentType);
|
||||||
.get(part._contentType);
|
|
||||||
|
|
||||||
if (partUnmarshaller != null) {
|
if (partUnmarshaller != null) {
|
||||||
UnmarshallContext context = new UnmarshallContext(this,
|
UnmarshallContext context = new UnmarshallContext(this,
|
||||||
|
@ -643,9 +653,14 @@ public abstract class OPCPackage implements RelationshipSource, Closeable {
|
||||||
.unmarshall(context, part.getInputStream());
|
.unmarshall(context, part.getInputStream());
|
||||||
partList.put(unmarshallPart._partName, unmarshallPart);
|
partList.put(unmarshallPart._partName, unmarshallPart);
|
||||||
|
|
||||||
// Core properties case
|
// Core properties case-- use first CoreProperties part we come across
|
||||||
if (unmarshallPart instanceof PackagePropertiesPart)
|
// and ignore any subsequent ones
|
||||||
|
if (unmarshallPart instanceof PackagePropertiesPart &&
|
||||||
|
hasCorePropertiesPart &&
|
||||||
|
needCorePropertiesPart) {
|
||||||
this.packageProperties = (PackagePropertiesPart) unmarshallPart;
|
this.packageProperties = (PackagePropertiesPart) unmarshallPart;
|
||||||
|
needCorePropertiesPart = false;
|
||||||
|
}
|
||||||
} catch (IOException ioe) {
|
} catch (IOException ioe) {
|
||||||
logger.log(POILogger.WARN, "Unmarshall operation : IOException for "
|
logger.log(POILogger.WARN, "Unmarshall operation : IOException for "
|
||||||
+ part._partName);
|
+ part._partName);
|
||||||
|
@ -718,19 +733,20 @@ public abstract class OPCPackage implements RelationshipSource, Closeable {
|
||||||
if (partList.containsKey(partName)
|
if (partList.containsKey(partName)
|
||||||
&& !partList.get(partName).isDeleted()) {
|
&& !partList.get(partName).isDeleted()) {
|
||||||
throw new PartAlreadyExistsException(
|
throw new PartAlreadyExistsException(
|
||||||
"A part with the name '"
|
"A part with the name '" + partName.getName() + "'" +
|
||||||
+ partName.getName()
|
" already exists : Packages shall not contain equivalent part names and package" +
|
||||||
+ "' already exists : Packages shall not contain equivalent part names and package implementers shall neither create nor recognize packages with equivalent part names. [M1.12]");
|
" implementers shall neither create nor recognize packages with equivalent part names. [M1.12]");
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Check OPC compliance */
|
/* Check OPC compliance */
|
||||||
|
|
||||||
// Rule [M4.1]: The format designer shall specify and the format
|
// Rule [M4.1]: The format designer shall specify and the format producer
|
||||||
// producer
|
|
||||||
// shall create at most one core properties relationship for a package.
|
// shall create at most one core properties relationship for a package.
|
||||||
// A format consumer shall consider more than one core properties
|
// A format consumer shall consider more than one core properties
|
||||||
// relationship for a package to be an error. If present, the
|
// relationship for a package to be an error. If present, the
|
||||||
// relationship shall target the Core Properties part.
|
// relationship shall target the Core Properties part.
|
||||||
|
// Note - POI will read files with more than one Core Properties, which
|
||||||
|
// Office sometimes produces, but is strict on generation
|
||||||
if (contentType.equals(ContentTypes.CORE_PROPERTIES_PART)) {
|
if (contentType.equals(ContentTypes.CORE_PROPERTIES_PART)) {
|
||||||
if (this.packageProperties != null)
|
if (this.packageProperties != null)
|
||||||
throw new InvalidOperationException(
|
throw new InvalidOperationException(
|
||||||
|
|
|
@ -25,6 +25,7 @@ import java.net.URISyntaxException;
|
||||||
import junit.framework.AssertionFailedError;
|
import junit.framework.AssertionFailedError;
|
||||||
import junit.framework.TestCase;
|
import junit.framework.TestCase;
|
||||||
|
|
||||||
|
import org.apache.poi.POIDataSamples;
|
||||||
import org.apache.poi.openxml4j.OpenXML4JTestDataSamples;
|
import org.apache.poi.openxml4j.OpenXML4JTestDataSamples;
|
||||||
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
|
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
|
||||||
import org.apache.poi.openxml4j.exceptions.InvalidOperationException;
|
import org.apache.poi.openxml4j.exceptions.InvalidOperationException;
|
||||||
|
@ -33,7 +34,6 @@ import org.apache.poi.openxml4j.opc.OPCPackage;
|
||||||
import org.apache.poi.openxml4j.opc.PackageRelationshipTypes;
|
import org.apache.poi.openxml4j.opc.PackageRelationshipTypes;
|
||||||
import org.apache.poi.openxml4j.opc.PackagingURIHelper;
|
import org.apache.poi.openxml4j.opc.PackagingURIHelper;
|
||||||
import org.apache.poi.openxml4j.opc.TargetMode;
|
import org.apache.poi.openxml4j.opc.TargetMode;
|
||||||
import org.apache.poi.POIDataSamples;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Test core properties Open Packaging Convention compliance.
|
* Test core properties Open Packaging Convention compliance.
|
||||||
|
@ -42,6 +42,7 @@ import org.apache.poi.POIDataSamples;
|
||||||
* at most one core properties relationship for a package. A format consumer
|
* at most one core properties relationship for a package. A format consumer
|
||||||
* shall consider more than one core properties relationship for a package to be
|
* shall consider more than one core properties relationship for a package to be
|
||||||
* an error. If present, the relationship shall target the Core Properties part.
|
* an error. If present, the relationship shall target the Core Properties part.
|
||||||
|
* (POI relaxes this on reading, as Office sometimes breaks this)
|
||||||
*
|
*
|
||||||
* M4.2: The format designer shall not specify and the format producer shall not
|
* M4.2: The format designer shall not specify and the format producer shall not
|
||||||
* create Core Properties that use the Markup Compatibility namespace as defined
|
* create Core Properties that use the Markup Compatibility namespace as defined
|
||||||
|
@ -82,28 +83,43 @@ public final class TestOPCComplianceCoreProperties extends TestCase {
|
||||||
}
|
}
|
||||||
|
|
||||||
private static String extractInvalidFormatMessage(String sampleNameSuffix) {
|
private static String extractInvalidFormatMessage(String sampleNameSuffix) {
|
||||||
|
|
||||||
InputStream is = OpenXML4JTestDataSamples.openComplianceSampleStream("OPCCompliance_CoreProperties_" + sampleNameSuffix);
|
InputStream is = OpenXML4JTestDataSamples.openComplianceSampleStream("OPCCompliance_CoreProperties_" + sampleNameSuffix);
|
||||||
OPCPackage pkg;
|
OPCPackage pkg;
|
||||||
try {
|
try {
|
||||||
pkg = OPCPackage.open(is);
|
pkg = OPCPackage.open(is);
|
||||||
} catch (InvalidFormatException e) {
|
} catch (InvalidFormatException e) {
|
||||||
// expected during successful test
|
// no longer required for successful test
|
||||||
return e.getMessage();
|
return e.getMessage();
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
throw new RuntimeException(e);
|
throw new RuntimeException(e);
|
||||||
}
|
}
|
||||||
pkg.revert();
|
pkg.revert();
|
||||||
// Normally must thrown an InvalidFormatException exception.
|
|
||||||
throw new AssertionFailedError("expected OPC compliance exception was not thrown");
|
throw new AssertionFailedError("expected OPC compliance exception was not thrown");
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Test M4.1 rule.
|
* Test M4.1 rule.
|
||||||
*/
|
*/
|
||||||
public void testOnlyOneCorePropertiesPart() {
|
public void testOnlyOneCorePropertiesPart() throws Exception {
|
||||||
String msg = extractInvalidFormatMessage("OnlyOneCorePropertiesPartFAIL.docx");
|
// We have relaxed this check, so we can read the file anyway
|
||||||
assertEquals("OPC Compliance error [M4.1]: there is more than one core properties relationship in the package !", msg);
|
try {
|
||||||
|
extractInvalidFormatMessage("OnlyOneCorePropertiesPartFAIL.docx");
|
||||||
|
fail("M4.1 should be being relaxed");
|
||||||
|
} catch (AssertionFailedError e) {}
|
||||||
|
|
||||||
|
// We will use the first core properties, and ignore the others
|
||||||
|
InputStream is = OpenXML4JTestDataSamples.openSampleStream("MultipleCoreProperties.docx");
|
||||||
|
OPCPackage pkg = OPCPackage.open(is);
|
||||||
|
|
||||||
|
// We can see 2 by type
|
||||||
|
assertEquals(2, pkg.getPartsByContentType(ContentTypes.CORE_PROPERTIES_PART).size());
|
||||||
|
// But only the first one by relationship
|
||||||
|
assertEquals(1, pkg.getPartsByRelationshipType(PackageRelationshipTypes.CORE_PROPERTIES).size());
|
||||||
|
// It should be core.xml not the older core1.xml
|
||||||
|
assertEquals(
|
||||||
|
"/docProps/core.xml",
|
||||||
|
pkg.getPartsByRelationshipType(PackageRelationshipTypes.CORE_PROPERTIES).get(0).getPartName().toString()
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
private static URI createURI(String text) {
|
private static URI createURI(String text) {
|
||||||
|
@ -131,7 +147,8 @@ public final class TestOPCComplianceCoreProperties extends TestCase {
|
||||||
try {
|
try {
|
||||||
pkg.addRelationship(PackagingURIHelper.createPartName(partUri), TargetMode.INTERNAL,
|
pkg.addRelationship(PackagingURIHelper.createPartName(partUri), TargetMode.INTERNAL,
|
||||||
PackageRelationshipTypes.CORE_PROPERTIES);
|
PackageRelationshipTypes.CORE_PROPERTIES);
|
||||||
fail("expected OPC compliance exception was not thrown");
|
// no longer fail on compliance error
|
||||||
|
//fail("expected OPC compliance exception was not thrown");
|
||||||
} catch (InvalidFormatException e) {
|
} catch (InvalidFormatException e) {
|
||||||
throw new RuntimeException(e);
|
throw new RuntimeException(e);
|
||||||
} catch (InvalidOperationException e) {
|
} catch (InvalidOperationException e) {
|
||||||
|
@ -157,7 +174,8 @@ public final class TestOPCComplianceCoreProperties extends TestCase {
|
||||||
try {
|
try {
|
||||||
pkg.createPart(PackagingURIHelper.createPartName(partUri),
|
pkg.createPart(PackagingURIHelper.createPartName(partUri),
|
||||||
ContentTypes.CORE_PROPERTIES_PART);
|
ContentTypes.CORE_PROPERTIES_PART);
|
||||||
fail("expected OPC compliance exception was not thrown");
|
// no longer fail on compliance error
|
||||||
|
//fail("expected OPC compliance exception was not thrown");
|
||||||
} catch (InvalidFormatException e) {
|
} catch (InvalidFormatException e) {
|
||||||
throw new RuntimeException(e);
|
throw new RuntimeException(e);
|
||||||
} catch (InvalidOperationException e) {
|
} catch (InvalidOperationException e) {
|
||||||
|
|
Binary file not shown.
Loading…
Reference in New Issue