diff --git a/CHANGES.txt b/CHANGES.txt index bcfcf918efb..a595497bc5c 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -514,6 +514,9 @@ Bug Fixes delimiters, and could additionally lose big position increments. (Robert Muir, yonik +61. SOLR-1091: Jetty's use of CESU-8 for code points outside the BMP + resulted in invalid output from the serialized PHP writer. (yonik) + Other Changes ---------------------- 1. Upgraded to Lucene 2.4.0 (yonik) diff --git a/src/java/org/apache/solr/request/PHPSerializedResponseWriter.java b/src/java/org/apache/solr/request/PHPSerializedResponseWriter.java index 25f26b40294..1049dca1d8d 100755 --- a/src/java/org/apache/solr/request/PHPSerializedResponseWriter.java +++ b/src/java/org/apache/solr/request/PHPSerializedResponseWriter.java @@ -23,6 +23,7 @@ import java.util.*; import org.apache.lucene.document.Document; import org.apache.lucene.document.Fieldable; +import org.apache.lucene.util.UnicodeUtil; import org.apache.solr.common.util.NamedList; import org.apache.solr.schema.SchemaField; import org.apache.solr.search.DocIterator; @@ -36,13 +37,14 @@ import org.apache.solr.search.SolrIndexSearcher; public class PHPSerializedResponseWriter implements QueryResponseWriter { static String CONTENT_TYPE_PHP_UTF8="text/x-php-serialized;charset=UTF-8"; + static boolean modifiedUTF8 = System.getProperty("jetty.home") != null; public void init(NamedList n) { /* NOOP */ } public void write(Writer writer, SolrQueryRequest req, SolrQueryResponse rsp) throws IOException { - PHPSerializedWriter w = new PHPSerializedWriter(writer, req, rsp); + PHPSerializedWriter w = new PHPSerializedWriter(writer, req, rsp, modifiedUTF8); try { w.writeResponse(); } finally { @@ -56,8 +58,13 @@ public class PHPSerializedResponseWriter implements QueryResponseWriter { } class PHPSerializedWriter extends JSONWriter { - public PHPSerializedWriter(Writer writer, SolrQueryRequest req, SolrQueryResponse rsp) { + final private boolean modifiedUTF8; + final UnicodeUtil.UTF8Result utf8; + + public PHPSerializedWriter(Writer writer, SolrQueryRequest req, SolrQueryResponse rsp, boolean modifiedUTF8) { super(writer, req, rsp); + this.modifiedUTF8 = modifiedUTF8; + this.utf8 = modifiedUTF8 ? null : new UnicodeUtil.UTF8Result(); // never indent serialized PHP data doIndent = false; } @@ -273,6 +280,28 @@ class PHPSerializedWriter extends JSONWriter { public void writeStr(String name, String val, boolean needsEscaping) throws IOException { // serialized PHP strings don't need to be escaped at all, however the // string size reported needs be the number of bytes rather than chars. - writer.write("s:"+val.getBytes("UTF8").length+":\""+val+"\";"); + int nBytes; + if (modifiedUTF8) { + nBytes = 0; + for (int i=0; i