mirror of https://github.com/apache/lucene.git
Upgraded xerces-2.9.1-patched-XERCESJ-1257.jar (committed as part of LUCENE-1591) to xercesImpl-2.10.0.jar (which contains the fix for XERCESJ-1257) and also upgraded xml-apis-2.9.0.jar to xml-apis-2.10.0.jar.
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1021234 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
4098f424b3
commit
15134055f9
|
@ -2,6 +2,15 @@ Lucene Benchmark Contrib Change Log
|
||||||
|
|
||||||
The Benchmark contrib package contains code for benchmarking Lucene in a variety of ways.
|
The Benchmark contrib package contains code for benchmarking Lucene in a variety of ways.
|
||||||
|
|
||||||
|
10/10/2010
|
||||||
|
The locally built patched version of the Xerces-J jar introduced
|
||||||
|
as part of LUCENE-1591 is no longer required, because Xerces
|
||||||
|
2.10.0, which contains a fix for XERCESJ-1257 (see
|
||||||
|
http://svn.apache.org/viewvc?view=revision&revision=554069),
|
||||||
|
was released earlier this year. Upgraded
|
||||||
|
xerces-2.9.1-patched-XERCESJ-1257.jar and xml-apis-2.9.0.jar
|
||||||
|
to xercesImpl-2.10.0.jar and xml-apis-2.10.0.jar. (Steven Rowe)
|
||||||
|
|
||||||
8/2/2010
|
8/2/2010
|
||||||
LUCENE-2582: You can now specify the default codec to use for
|
LUCENE-2582: You can now specify the default codec to use for
|
||||||
writing new segments by adding default.codec = Pulsing (for
|
writing new segments by adding default.codec = Pulsing (for
|
||||||
|
|
|
@ -20,50 +20,3 @@ After that, ant enwiki should process the data set and run a load
|
||||||
test. Ant targets get-enwiki, expand-enwiki, and extract-enwiki can
|
test. Ant targets get-enwiki, expand-enwiki, and extract-enwiki can
|
||||||
also be used to download, decompress, and extract (to individual files
|
also be used to download, decompress, and extract (to individual files
|
||||||
in work/enwiki) the dataset, respectively.
|
in work/enwiki) the dataset, respectively.
|
||||||
|
|
||||||
NOTE: This bug in Xerces:
|
|
||||||
|
|
||||||
https://issues.apache.org/jira/browse/XERCESJ-1257
|
|
||||||
|
|
||||||
which is still present as of 2.9.1, causes an exception like this when
|
|
||||||
processing Wikipedia's XML:
|
|
||||||
|
|
||||||
Caused by: org.apache.xerces.impl.io.MalformedByteSequenceException: Invalid byte 2 of 4-byte UTF-8 sequence.
|
|
||||||
at org.apache.xerces.util.ErrorHandlerWrapper.createSAXParseException(Unknown Source)
|
|
||||||
at org.apache.xerces.util.ErrorHandlerWrapper.fatalError(Unknown Source)
|
|
||||||
at org.apache.xerces.impl.XMLErrorReporter.reportError(Unknown Source)
|
|
||||||
at org.apache.xerces.impl.XMLErrorReporter.reportError(Unknown Source)
|
|
||||||
at org.apache.xerces.impl.XMLDocumentFragmentScannerImpl$FragmentContentDispatcher.dispatch(Unknown Source)
|
|
||||||
at org.apache.xerces.impl.XMLDocumentFragmentScannerImpl.scanDocument(Unknown Source)
|
|
||||||
at org.apache.xerces.parsers.XML11Configuration.parse(Unknown Source)
|
|
||||||
at org.apache.xerces.parsers.XML11Configuration.parse(Unknown Source)
|
|
||||||
at org.apache.xerces.parsers.XMLParser.parse(Unknown Source)
|
|
||||||
at org.apache.xerces.parsers.AbstractSAXParser.parse(Unknown Source)
|
|
||||||
at org.apache.lucene.benchmark.byTask.feeds.EnwikiDocMaker$Parser.run(EnwikiDocMaker.java:77)
|
|
||||||
... 1 more
|
|
||||||
|
|
||||||
The original poster in the Xerces bug provided this patch:
|
|
||||||
|
|
||||||
--- UTF8Reader.java 2006-11-23 00:36:53.000000000 +0100
|
|
||||||
+++ /home/rainman/lucene/xerces-2_9_0/src/org/apache/xerces/impl/io/UTF8Reader.java 2008-04-04 00:40:58.000000000 +0200
|
|
||||||
@@ -534,6 +534,16 @@
|
|
||||||
invalidByte(4, 4, b2);
|
|
||||||
}
|
|
||||||
|
|
||||||
+ // check if output buffer is large enough to hold 2 surrogate chars
|
|
||||||
+ if( out + 1 >= offset + length ){
|
|
||||||
+ fBuffer[0] = (byte)b0;
|
|
||||||
+ fBuffer[1] = (byte)b1;
|
|
||||||
+ fBuffer[2] = (byte)b2;
|
|
||||||
+ fBuffer[3] = (byte)b3;
|
|
||||||
+ fOffset = 4;
|
|
||||||
+ return out - offset;
|
|
||||||
+ }
|
|
||||||
+
|
|
||||||
// decode bytes into surrogate characters
|
|
||||||
int uuuuu = ((b0 << 2) & 0x001C) | ((b1 >> 4) & 0x0003);
|
|
||||||
if (uuuuu > 0x10) {
|
|
||||||
|
|
||||||
which I've applied to Xerces 2.9.1 sources, and committed under
|
|
||||||
lib/xerces-2.9.1-patched-XERCESJ-1257.jar. Once XERCESJ-1257 is fixed
|
|
||||||
we can upgrade to a standard Xerces release.
|
|
||||||
|
|
|
@ -1,2 +0,0 @@
|
||||||
AnyObjectId[bbb5aa7ad5bcea61c5c66ceb2ba340431cc7262d] was removed in git history.
|
|
||||||
Apache SVN contains full history.
|
|
|
@ -0,0 +1,2 @@
|
||||||
|
AnyObjectId[9dcd8c38196b24e51f78d8e1b0a42d1ffef60acb] was removed in git history.
|
||||||
|
Apache SVN contains full history.
|
|
@ -0,0 +1,2 @@
|
||||||
|
AnyObjectId[46733464fc746776c331ecc51061f3a05e662fd1] was removed in git history.
|
||||||
|
Apache SVN contains full history.
|
|
@ -1,2 +0,0 @@
|
||||||
AnyObjectId[d42c0ea6cfd17ed6b444b8337febbc0bdb55ed83] was removed in git history.
|
|
||||||
Apache SVN contains full history.
|
|
|
@ -227,7 +227,7 @@ content.source=org.apache.lucene.benchmark.byTask.feeds.SortableSingleDocSource
|
||||||
print ' mkdir %s' % LOG_DIR
|
print ' mkdir %s' % LOG_DIR
|
||||||
os.makedirs(LOG_DIR)
|
os.makedirs(LOG_DIR)
|
||||||
|
|
||||||
command = '%s -classpath ../../build/classes/java:../../build/classes/demo:../../build/contrib/highlighter/classes/java:lib/commons-digester-1.7.jar:lib/commons-collections-3.1.jar:lib/commons-compress-1.0.jar:lib/commons-logging-1.0.4.jar:lib/commons-beanutils-1.7.0.jar:lib/xerces-2.9.0.jar:lib/xml-apis-2.9.0.jar:../../build/contrib/benchmark/classes/java org.apache.lucene.benchmark.byTask.Benchmark %s > "%s" 2>&1' % (JAVA_COMMAND, algFile, fullLogFileName)
|
command = '%s -classpath ../../build/classes/java:../../build/classes/demo:../../build/contrib/highlighter/classes/java:lib/commons-digester-1.7.jar:lib/commons-collections-3.1.jar:lib/commons-compress-1.0.jar:lib/commons-logging-1.0.4.jar:lib/commons-beanutils-1.7.0.jar:lib/xerces-2.10.0.jar:lib/xml-apis-2.10.0.jar:../../build/contrib/benchmark/classes/java org.apache.lucene.benchmark.byTask.Benchmark %s > "%s" 2>&1' % (JAVA_COMMAND, algFile, fullLogFileName)
|
||||||
|
|
||||||
if DEBUG:
|
if DEBUG:
|
||||||
print 'command=%s' % command
|
print 'command=%s' % command
|
||||||
|
|
Loading…
Reference in New Issue