From 9ce62b7c078f04b731ad37f4b9e6a1013518c87c Mon Sep 17 00:00:00 2001 From: Robert Muir Date: Wed, 9 Mar 2011 19:19:29 +0000 Subject: [PATCH] SOLR-2381: update jetty to a new patched version (fixes boundary issue); avoid slow/buggy jetty unicode conversion when possible; adds unicode test git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1079949 13f79535-47bb-0310-9956-ffa450edef68 --- solr/CHANGES.txt | 2 +- .../lib/jetty-6.1.26-patched-JETTY-1340.jar | 2 +- .../jetty-util-6.1.26-patched-JETTY-1340.jar | 2 +- .../solr/client/solrj/SolrExampleTests.java | 88 +++++++++++++++++++ .../solr/servlet/SolrDispatchFilter.java | 16 +++- 5 files changed, 105 insertions(+), 5 deletions(-) diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index dee38f113ab..0f1ebfc1aad 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -834,7 +834,7 @@ Other Changes * SOLR-2381: Include a patched version of Jetty (6.1.26 + JETTY-1340) to fix problematic UTF-8 handling for supplementary characters. - (uschindler, yonik, rmuir) + (Bernd Fehling, uschindler, yonik, rmuir) * SOLR-2391: The preferred Content-Type for XML was changed to application/xml. XMLResponseWriter now only delivers using this diff --git a/solr/example/lib/jetty-6.1.26-patched-JETTY-1340.jar b/solr/example/lib/jetty-6.1.26-patched-JETTY-1340.jar index 3368a8ed21c..16de87a3020 100644 --- a/solr/example/lib/jetty-6.1.26-patched-JETTY-1340.jar +++ b/solr/example/lib/jetty-6.1.26-patched-JETTY-1340.jar @@ -1,2 +1,2 @@ -AnyObjectId[33aa586bcc18fc1ff0276b56e595281fb90e2bb5] was removed in git history. +AnyObjectId[6be492c92fd7b36ed008e8461c8657cc6bd27c2a] was removed in git history. Apache SVN contains full history. \ No newline at end of file diff --git a/solr/example/lib/jetty-util-6.1.26-patched-JETTY-1340.jar b/solr/example/lib/jetty-util-6.1.26-patched-JETTY-1340.jar index 67f2e8587c4..0304e37bbf7 100644 --- a/solr/example/lib/jetty-util-6.1.26-patched-JETTY-1340.jar +++ b/solr/example/lib/jetty-util-6.1.26-patched-JETTY-1340.jar @@ -1,2 +1,2 @@ -AnyObjectId[286a135efb973e693b20e437dc01eb8658e72ff2] was removed in git history. +AnyObjectId[1a9ace88dd00cf94e17d231805cc8bdc60886376] was removed in git history. Apache SVN contains full history. \ No newline at end of file diff --git a/solr/src/test/org/apache/solr/client/solrj/SolrExampleTests.java b/solr/src/test/org/apache/solr/client/solrj/SolrExampleTests.java index 912856009ea..f1aea4dc418 100644 --- a/solr/src/test/org/apache/solr/client/solrj/SolrExampleTests.java +++ b/solr/src/test/org/apache/solr/client/solrj/SolrExampleTests.java @@ -27,6 +27,10 @@ import java.util.List; import java.util.concurrent.atomic.AtomicInteger; import junit.framework.Assert; +import org.apache.lucene.util._TestUtil; +import org.apache.solr.client.solrj.impl.BinaryResponseParser; +import org.apache.solr.client.solrj.impl.CommonsHttpSolrServer; +import org.apache.solr.client.solrj.impl.XMLResponseParser; import org.apache.solr.client.solrj.request.DirectXmlRequest; import org.apache.solr.client.solrj.request.LukeRequest; import org.apache.solr.client.solrj.request.SolrPing; @@ -215,6 +219,90 @@ abstract public class SolrExampleTests extends SolrJettyTestBase } + private String randomTestString(int maxLength) { + // we can't just use _TestUtil.randomUnicodeString() or we might get 0xfffe etc + // (considered invalid by XML) + + int size = random.nextInt(maxLength); + StringBuilder sb = new StringBuilder(); + for (int i = 0; i < size; i++) { + switch(random.nextInt(4)) { + case 0: /* single byte */ + sb.append('a'); + break; + case 1: /* two bytes */ + sb.append('\u0645'); + break; + case 2: /* three bytes */ + sb.append('\u092a'); + break; + case 3: /* four bytes */ + sb.appendCodePoint(0x29B05); + } + } + return sb.toString(); + } + + public void testUnicode() throws Exception { + int numIterations = 100 * RANDOM_MULTIPLIER; + + SolrServer server = getSolrServer(); + + // save the old parser, so we can set it back. + ResponseParser oldParser = null; + if (server instanceof CommonsHttpSolrServer) { + CommonsHttpSolrServer cserver = (CommonsHttpSolrServer) server; + oldParser = cserver.getParser(); + } + + try { + for (int iteration = 0; iteration < numIterations; iteration++) { + // choose format + if (server instanceof CommonsHttpSolrServer) { + if (random.nextBoolean()) { + ((CommonsHttpSolrServer) server).setParser(new BinaryResponseParser()); + } else { + ((CommonsHttpSolrServer) server).setParser(new XMLResponseParser()); + } + } + + int numDocs = _TestUtil.nextInt(random, 1, 100); + + // Empty the database... + server.deleteByQuery("*:*");// delete everything! + + List docs = new ArrayList(); + for (int i = 0; i < numDocs; i++) { + // Now add something... + SolrInputDocument doc = new SolrInputDocument(); + doc.addField("id", "" + i); + doc.addField("unicode_s", randomTestString(30)); + docs.add(doc); + } + + server.add(docs); + server.commit(); + + SolrQuery query = new SolrQuery(); + query.setQuery("*:*"); + query.setRows(numDocs); + + QueryResponse rsp = server.query( query ); + + for (int i = 0; i < numDocs; i++) { + String expected = (String) docs.get(i).getFieldValue("unicode_s"); + String actual = (String) rsp.getResults().get(i).getFieldValue("unicode_s"); + assertEquals(expected, actual); + } + } + } finally { + if (oldParser != null) { + // set the old parser back + ((CommonsHttpSolrServer)server).setParser(oldParser); + } + } + } + /** * query the example */ diff --git a/solr/src/webapp/src/org/apache/solr/servlet/SolrDispatchFilter.java b/solr/src/webapp/src/org/apache/solr/servlet/SolrDispatchFilter.java index 6ac606d68f0..bf002811937 100644 --- a/solr/src/webapp/src/org/apache/solr/servlet/SolrDispatchFilter.java +++ b/solr/src/webapp/src/org/apache/solr/servlet/SolrDispatchFilter.java @@ -18,9 +18,12 @@ package org.apache.solr.servlet; import java.io.IOException; +import java.io.Writer; import java.io.PrintWriter; import java.io.StringWriter; +import java.io.OutputStreamWriter; import java.io.ByteArrayInputStream; +import java.nio.charset.Charset; import java.util.Map; import java.util.WeakHashMap; import org.slf4j.Logger; @@ -40,6 +43,8 @@ import org.apache.solr.common.SolrException; import org.apache.solr.common.util.NamedList; import org.apache.solr.common.util.SimpleOrderedMap; import org.apache.solr.common.params.CommonParams; +import org.apache.solr.common.util.FastWriter; +import org.apache.solr.common.util.ContentStreamBase; import org.apache.solr.core.*; import org.apache.solr.request.*; import org.apache.solr.response.BinaryQueryResponseWriter; @@ -63,6 +68,8 @@ public class SolrDispatchFilter implements Filter protected String solrConfigFilename = null; protected final Map parsers = new WeakHashMap(); protected final SolrRequestParsers adminRequestParser; + + private static final Charset UTF8 = Charset.forName("UTF-8"); public SolrDispatchFilter() { try { @@ -319,14 +326,19 @@ public class SolrDispatchFilter implements Filter final String ct = responseWriter.getContentType(solrReq, solrRsp); // don't call setContentType on null if (null != ct) response.setContentType(ct); + if (Method.HEAD != reqMethod) { if (responseWriter instanceof BinaryQueryResponseWriter) { BinaryQueryResponseWriter binWriter = (BinaryQueryResponseWriter) responseWriter; binWriter.write(response.getOutputStream(), solrReq, solrRsp); } else { - PrintWriter out = response.getWriter(); + String charset = ContentStreamBase.getCharsetFromContentType(ct); + Writer out = (charset == null || charset.equalsIgnoreCase("UTF-8")) + ? new OutputStreamWriter(response.getOutputStream(), UTF8) + : new OutputStreamWriter(response.getOutputStream(), charset); + out = new FastWriter(out); responseWriter.write(out, solrReq, solrRsp); - + out.flush(); } } //else http HEAD request, nothing to write out, waited this long just to get ContentType