LUCENE-3937: Workaround a XERCES-J bug in benchmark module.

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1307141 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Uwe Schindler 2012-03-29 22:04:15 +00:00
parent 69878cf594
commit 4d31bb3a7c
5 changed files with 15 additions and 39 deletions

View File

@ -294,6 +294,9 @@ Bug Fixes
could stop early if the Reader only partially fills the provided
buffer. (Mike McCandless)
* LUCENE-3937: Workaround a XERCES-J bug in benchmark module.
(Uwe Schindler, Robert Muir, Mike McCandless)
Documentation
* LUCENE-3599: Javadocs for DistanceUtils.haversine() were incorrectly

View File

@ -1,36 +0,0 @@
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
<!--
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied. See the License for the
specific language governing permissions and limitations
under the License.
-->
<parent>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-parent</artifactId>
<version>@version@</version>
</parent>
<modelVersion>4.0.0</modelVersion>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-xercesImpl</artifactId>
<name>Lucene Specific xercesImpl</name>
<version>@version@</version>
<description>Lucene Specific xercesImpl v2.9.1 patched with XERCESJ-1257</description>
<packaging>jar</packaging>
</project>

View File

@ -1,2 +0,0 @@
AnyObjectId[bbb5aa7ad5bcea61c5c66ceb2ba340431cc7262d] was removed in git history.
Apache SVN contains full history.

View File

@ -0,0 +1,2 @@
AnyObjectId[547f56300d93fe36587910739e095f03e287d47e] was removed in git history.
Apache SVN contains full history.

View File

@ -20,12 +20,17 @@ package org.apache.lucene.benchmark.byTask.feeds;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.nio.charset.CharsetDecoder;
import java.nio.charset.CodingErrorAction;
import java.util.HashMap;
import java.util.Map;
import org.apache.lucene.benchmark.byTask.utils.Config;
import org.apache.lucene.benchmark.byTask.utils.StreamUtils;
import org.apache.lucene.util.ThreadInterruptedException;
import org.apache.lucene.util.IOUtils;
import org.xml.sax.Attributes;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
@ -172,7 +177,11 @@ public class EnwikiContentSource extends ContentSource {
while(true){
final InputStream localFileIS = is;
try {
reader.parse(new InputSource(localFileIS));
// To work around a bug in XERCES (XERCESJ-1257), we assume the XML is always UTF8, so we simply provide reader.
CharsetDecoder decoder = IOUtils.CHARSET_UTF_8.newDecoder()
.onMalformedInput(CodingErrorAction.REPORT)
.onUnmappableCharacter(CodingErrorAction.REPORT);
reader.parse(new InputSource(new BufferedReader(new InputStreamReader(localFileIS, decoder))));
} catch (IOException ioe) {
synchronized(EnwikiContentSource.this) {
if (localFileIS != is) {