allow white spaces and unicode in OPC relationship targets, see Bugzilla 50154

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1036215 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Yegor Kozlov 2010-11-17 20:40:35 +00:00
parent b621f02de0
commit 5f11a63d37
8 changed files with 173 additions and 22 deletions

View File

@ -34,6 +34,7 @@
<changes>
<release version="3.8-beta1" date="2010-??-??">
<action dev="poi-developers" type="fix">50154 - Allow white spaces and unicode in OPC relationship targets </action>
<action dev="poi-developers" type="fix">50113 - Remove cell from Calculation Chain after setting cell type to blank </action>
<action dev="poi-developers" type="fix">49966 - Ensure that XSSFRow#removeCell cleares calculation chain entries </action>
<action dev="poi-developers" type="fix">50096 - Fixed evaluation of cell references with column index greater than 255 </action>

View File

@ -351,16 +351,8 @@ public final class PackageRelationshipCollection implements
PackageRelationship.TARGET_ATTRIBUTE_NAME)
.getValue();
if (value.indexOf("\\") != -1) {
logger
.log(POILogger.INFO, "target contains \\ therefore not a valid URI"
+ value + " replaced by /");
value = value.replaceAll("\\\\", "/");
// word can save external relationship with a \ instead
// of /
}
target = PackagingURIHelper.toURI(value);
target = new URI(value);
} catch (URISyntaxException e) {
logger.log(POILogger.ERROR, "Cannot convert " + value
+ " in a valid relationship URI-> ignored", e);

View File

@ -75,6 +75,11 @@ public interface PackageRelationshipTypes {
*/
String IMAGE_PART = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/image";
/**
* Hyperlink type.
*/
String HYPERLINK_PART = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/hyperlink";
/**
* Style type.
*/

View File

@ -19,6 +19,8 @@ package org.apache.poi.openxml4j.opc;
import java.net.URI;
import java.net.URISyntaxException;
import java.nio.ByteBuffer;
import java.io.UnsupportedEncodingException;
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
import org.apache.poi.openxml4j.exceptions.InvalidOperationException;
@ -287,7 +289,7 @@ public final class PackagingURIHelper {
// form must actually be an absolute URI
if(sourceURI.toString().equals("/")) {
String path = targetURI.getPath();
if(msCompatible && path.charAt(0) == '/') {
if(msCompatible && path.length() > 0 && path.charAt(0) == '/') {
try {
targetURI = new URI(path.substring(1));
} catch (Exception e) {
@ -362,6 +364,12 @@ public final class PackagingURIHelper {
}
}
// if the target had a fragment then append it to the result
String fragment = targetURI.getRawFragment();
if (fragment != null) {
retVal.append("#").append(fragment);
}
try {
return new URI(retVal.toString());
} catch (Exception e) {
@ -412,9 +420,9 @@ public final class PackagingURIHelper {
* Get URI from a string path.
*/
public static URI getURIFromPath(String path) {
URI retUri = null;
URI retUri;
try {
retUri = new URI(path);
retUri = toURI(path);
} catch (URISyntaxException e) {
throw new IllegalArgumentException("path");
}
@ -484,7 +492,7 @@ public final class PackagingURIHelper {
throws InvalidFormatException {
URI partNameURI;
try {
partNameURI = new URI(resolvePartName(partName));
partNameURI = toURI(partName);
} catch (URISyntaxException e) {
throw new InvalidFormatException(e.getMessage());
}
@ -648,7 +656,9 @@ public final class PackagingURIHelper {
}
/**
* If part name is not a valid URI, it is resolved as follows:
* Convert a string to {@link java.net.URI}
*
* If part name is not a valid URI, it is resolved as follows:
* <p>
* 1. Percent-encode each open bracket ([) and close bracket (]).</li>
* 2. Percent-encode each percent (%) character that is not followed by a hexadecimal notation of an octet value.</li>
@ -663,12 +673,72 @@ public final class PackagingURIHelper {
* in ?5.2 of RFC 3986. The path component of the resulting absolute URI is the part name.
*</p>
*
* @param partName the name to resolve
* @param value the string to be parsed into a URI
* @return the resolved part name that should be OK to construct a URI
*
* TODO YK: for now this method does only (5). Finish the rest.
*/
public static String resolvePartName(String partName){
return partName.replace('\\', '/');
public static URI toURI(String value) throws URISyntaxException {
//5. Convert all back slashes to forward slashes
if (value.indexOf("\\") != -1) {
value = value.replace('\\', '/');
}
// URI fragemnts (those starting with '#') are not encoded
// and may contain white spaces and raw unicode characters
int fragmentIdx = value.indexOf('#');
if(fragmentIdx != -1){
String path = value.substring(0, fragmentIdx);
String fragment = value.substring(fragmentIdx + 1);
value = path + "#" + encode(fragment);
}
return new URI(value);
}
/**
* percent-encode white spaces and characters above 0x80.
* <p>
* Examples:
* 'Apache POI' --> 'Apache%20POI'
* 'Apache\u0410POI' --> 'Apache%04%10POI'
*
* @param s the string to encode
* @return the encoded string
*/
public static String encode(String s) {
int n = s.length();
if (n == 0) return s;
ByteBuffer bb;
try {
bb = ByteBuffer.wrap(s.getBytes("UTF-8"));
} catch (UnsupportedEncodingException e){
// should not happen
throw new RuntimeException(e);
}
StringBuilder sb = new StringBuilder();
while (bb.hasRemaining()) {
int b = bb.get() & 0xff;
if (isUnsafe(b)) {
sb.append('%');
sb.append(hexDigits[(b >> 4) & 0x0F]);
sb.append(hexDigits[(b >> 0) & 0x0F]);
} else {
sb.append((char)b);
}
}
return sb.toString();
}
private final static char[] hexDigits = {
'0', '1', '2', '3', '4', '5', '6', '7',
'8', '9', 'A', 'B', 'C', 'D', 'E', 'F'
};
private static boolean isUnsafe(int ch) {
return ch > 0x80 || " ".indexOf(ch) >= 0;
}
}

View File

@ -163,10 +163,7 @@ public final class ZipPartMarshaller implements PartMarshaller {
} else {
URI targetURI = rel.getTargetURI();
targetValue = PackagingURIHelper.relativizeURI(
sourcePartURI, targetURI, true).getPath();
if (targetURI.getRawFragment() != null) {
targetValue += "#" + targetURI.getRawFragment();
}
sourcePartURI, targetURI, true).toString();
}
relElem.addAttribute(PackageRelationship.TARGET_ATTRIBUTE_NAME,
targetValue);

View File

@ -17,6 +17,7 @@
package org.apache.poi.openxml4j.opc;
import java.net.URI;
import java.net.URISyntaxException;
import junit.framework.TestCase;
@ -35,7 +36,9 @@ public class TestPackagingURIHelper extends TestCase {
public void testRelativizeURI() throws Exception {
URI uri1 = new URI("/word/document.xml");
URI uri2 = new URI("/word/media/image1.gif");
URI uri3 = new URI("/word/media/image1.gif#Sheet1!A1");
URI uri4 = new URI("#'My%20Sheet1'!A1");
// Document to image is down a directory
URI retURI1to2 = PackagingURIHelper.relativizeURI(uri1, uri2);
assertEquals("media/image1.gif", retURI1to2.getPath());
@ -60,6 +63,12 @@ public class TestPackagingURIHelper extends TestCase {
//URI compatible with MS Office and OpenOffice: leading slash is removed
uriRes = PackagingURIHelper.relativizeURI(root, uri1, true);
assertEquals("word/document.xml", uriRes.toString());
//preserve URI fragments
uriRes = PackagingURIHelper.relativizeURI(uri1, uri3, true);
assertEquals("media/image1.gif#Sheet1!A1", uriRes.toString());
uriRes = PackagingURIHelper.relativizeURI(root, uri4, true);
assertEquals("#'My%20Sheet1'!A1", uriRes.toString());
}
/**
@ -104,4 +113,22 @@ public class TestPackagingURIHelper extends TestCase {
.equals(relativeName));
pkg.revert();
}
public void testCreateURIFromString() throws Exception {
String[] href = {
"..\\\\\\cygwin\\home\\yegor\\.vim\\filetype.vim",
"..\\Program%20Files\\AGEIA%20Technologies\\v2.3.3\\NxCooking.dll",
"file:///D:\\seva\\1981\\r810102ns.mp3",
"..\\cygwin\\home\\yegor\\dinom\\%5baccess%5d.2010-10-26.log",
"#'Instructions (Text)'!B21"
};
for(String s : href){
try {
URI uri = PackagingURIHelper.toURI(s);
} catch (URISyntaxException e){
fail("Failed to create URI from " + s);
}
}
}
}

View File

@ -18,6 +18,7 @@
package org.apache.poi.openxml4j.opc;
import java.io.*;
import java.net.URI;
import junit.framework.TestCase;
@ -254,4 +255,62 @@ public class TestRelationships extends TestCase {
pkg.getRelationshipsByType("http://schemas.openxmlformats.org/package/2006/relationships/metadata/core-properties").getRelationship(0).getTargetURI().toString());
}
public void testTargetWithSpecialChars() throws Exception{
OPCPackage pkg;
String filepath = OpenXML4JTestDataSamples.getSampleFileName("50154.xlsx");
pkg = OPCPackage.open(filepath);
assert_50154(pkg);
ByteArrayOutputStream baos = new ByteArrayOutputStream();
pkg.save(baos);
ByteArrayInputStream bais = new ByteArrayInputStream(baos.toByteArray());
pkg = OPCPackage.open(bais);
assert_50154(pkg);
}
public void assert_50154(OPCPackage pkg) throws Exception {
URI drawingURI = new URI("/xl/drawings/drawing1.xml");
PackagePart drawingPart = pkg.getPart(PackagingURIHelper.createPartName(drawingURI));
PackageRelationshipCollection drawingRels = drawingPart.getRelationships();
assertEquals(6, drawingRels.size());
// expected one image
assertEquals(1, drawingPart.getRelationshipsByType("http://schemas.openxmlformats.org/officeDocument/2006/relationships/image").size());
// and three hyperlinks
assertEquals(5, drawingPart.getRelationshipsByType("http://schemas.openxmlformats.org/officeDocument/2006/relationships/hyperlink").size());
PackageRelationship rId1 = drawingPart.getRelationship("rId1");
URI parent = drawingPart.getPartName().getURI();
URI rel1 = parent.relativize(rId1.getTargetURI());
URI rel11 = PackagingURIHelper.relativizeURI(drawingPart.getPartName().getURI(), rId1.getTargetURI());
assertEquals("'Another Sheet'!A1", rel1.getFragment());
PackageRelationship rId2 = drawingPart.getRelationship("rId2");
URI rel2 = PackagingURIHelper.relativizeURI(drawingPart.getPartName().getURI(), rId2.getTargetURI());
assertEquals("../media/image1.png", rel2.getPath());
PackageRelationship rId3 = drawingPart.getRelationship("rId3");
URI rel3 = parent.relativize(rId3.getTargetURI());
assertEquals("ThirdSheet!A1", rel3.getFragment());
PackageRelationship rId4 = drawingPart.getRelationship("rId4");
URI rel4 = parent.relativize(rId4.getTargetURI());
assertEquals("'\u0410\u043F\u0430\u0447\u0435 \u041F\u041E\u0418'!A1", rel4.getFragment());
PackageRelationship rId5 = drawingPart.getRelationship("rId5");
URI rel5 = parent.relativize(rId5.getTargetURI());
// back slashed have been replaced with forward
assertEquals("file:///D:/chan-chan.mp3", rel5.toString());
PackageRelationship rId6 = drawingPart.getRelationship("rId6");
URI rel6 = parent.relativize(rId6.getTargetURI());
assertEquals("../../../../../../../cygwin/home/yegor/dinom/&&&[access].2010-10-26.log", rel6.getPath());
assertEquals("'\u0410\u043F\u0430\u0447\u0435 \u041F\u041E\u0418'!A5", rel6.getFragment());
}
}

Binary file not shown.