mirror of https://github.com/apache/lucene.git
SOLR-1091: support CESU-8 for jetty in serialized PHP writer
git-svn-id: https://svn.apache.org/repos/asf/lucene/solr/trunk@808988 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
89b87c32d2
commit
b995d3d9c0
|
@ -514,6 +514,9 @@ Bug Fixes
|
||||||
delimiters, and could additionally lose big position increments.
|
delimiters, and could additionally lose big position increments.
|
||||||
(Robert Muir, yonik
|
(Robert Muir, yonik
|
||||||
|
|
||||||
|
61. SOLR-1091: Jetty's use of CESU-8 for code points outside the BMP
|
||||||
|
resulted in invalid output from the serialized PHP writer. (yonik)
|
||||||
|
|
||||||
Other Changes
|
Other Changes
|
||||||
----------------------
|
----------------------
|
||||||
1. Upgraded to Lucene 2.4.0 (yonik)
|
1. Upgraded to Lucene 2.4.0 (yonik)
|
||||||
|
|
|
@ -23,6 +23,7 @@ import java.util.*;
|
||||||
|
|
||||||
import org.apache.lucene.document.Document;
|
import org.apache.lucene.document.Document;
|
||||||
import org.apache.lucene.document.Fieldable;
|
import org.apache.lucene.document.Fieldable;
|
||||||
|
import org.apache.lucene.util.UnicodeUtil;
|
||||||
import org.apache.solr.common.util.NamedList;
|
import org.apache.solr.common.util.NamedList;
|
||||||
import org.apache.solr.schema.SchemaField;
|
import org.apache.solr.schema.SchemaField;
|
||||||
import org.apache.solr.search.DocIterator;
|
import org.apache.solr.search.DocIterator;
|
||||||
|
@ -36,13 +37,14 @@ import org.apache.solr.search.SolrIndexSearcher;
|
||||||
|
|
||||||
public class PHPSerializedResponseWriter implements QueryResponseWriter {
|
public class PHPSerializedResponseWriter implements QueryResponseWriter {
|
||||||
static String CONTENT_TYPE_PHP_UTF8="text/x-php-serialized;charset=UTF-8";
|
static String CONTENT_TYPE_PHP_UTF8="text/x-php-serialized;charset=UTF-8";
|
||||||
|
static boolean modifiedUTF8 = System.getProperty("jetty.home") != null;
|
||||||
|
|
||||||
public void init(NamedList n) {
|
public void init(NamedList n) {
|
||||||
/* NOOP */
|
/* NOOP */
|
||||||
}
|
}
|
||||||
|
|
||||||
public void write(Writer writer, SolrQueryRequest req, SolrQueryResponse rsp) throws IOException {
|
public void write(Writer writer, SolrQueryRequest req, SolrQueryResponse rsp) throws IOException {
|
||||||
PHPSerializedWriter w = new PHPSerializedWriter(writer, req, rsp);
|
PHPSerializedWriter w = new PHPSerializedWriter(writer, req, rsp, modifiedUTF8);
|
||||||
try {
|
try {
|
||||||
w.writeResponse();
|
w.writeResponse();
|
||||||
} finally {
|
} finally {
|
||||||
|
@ -56,8 +58,13 @@ public class PHPSerializedResponseWriter implements QueryResponseWriter {
|
||||||
}
|
}
|
||||||
|
|
||||||
class PHPSerializedWriter extends JSONWriter {
|
class PHPSerializedWriter extends JSONWriter {
|
||||||
public PHPSerializedWriter(Writer writer, SolrQueryRequest req, SolrQueryResponse rsp) {
|
final private boolean modifiedUTF8;
|
||||||
|
final UnicodeUtil.UTF8Result utf8;
|
||||||
|
|
||||||
|
public PHPSerializedWriter(Writer writer, SolrQueryRequest req, SolrQueryResponse rsp, boolean modifiedUTF8) {
|
||||||
super(writer, req, rsp);
|
super(writer, req, rsp);
|
||||||
|
this.modifiedUTF8 = modifiedUTF8;
|
||||||
|
this.utf8 = modifiedUTF8 ? null : new UnicodeUtil.UTF8Result();
|
||||||
// never indent serialized PHP data
|
// never indent serialized PHP data
|
||||||
doIndent = false;
|
doIndent = false;
|
||||||
}
|
}
|
||||||
|
@ -273,6 +280,28 @@ class PHPSerializedWriter extends JSONWriter {
|
||||||
public void writeStr(String name, String val, boolean needsEscaping) throws IOException {
|
public void writeStr(String name, String val, boolean needsEscaping) throws IOException {
|
||||||
// serialized PHP strings don't need to be escaped at all, however the
|
// serialized PHP strings don't need to be escaped at all, however the
|
||||||
// string size reported needs be the number of bytes rather than chars.
|
// string size reported needs be the number of bytes rather than chars.
|
||||||
writer.write("s:"+val.getBytes("UTF8").length+":\""+val+"\";");
|
int nBytes;
|
||||||
|
if (modifiedUTF8) {
|
||||||
|
nBytes = 0;
|
||||||
|
for (int i=0; i<val.length(); i++) {
|
||||||
|
char ch = val.charAt(i);
|
||||||
|
if (ch<='\u007f') {
|
||||||
|
nBytes += 1;
|
||||||
|
} else if (ch<='\u07ff') {
|
||||||
|
nBytes += 2;
|
||||||
|
} else {
|
||||||
|
nBytes += 3;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
UnicodeUtil.UTF16toUTF8(val, 0, val.length(), utf8);
|
||||||
|
nBytes = utf8.length;
|
||||||
|
}
|
||||||
|
|
||||||
|
writer.write("s:");
|
||||||
|
writer.write(Integer.toString(nBytes));
|
||||||
|
writer.write(":\"");
|
||||||
|
writer.write(val);
|
||||||
|
writer.write("\";");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue