mirror of https://github.com/apache/lucene.git
LUCENE-4588: EnwikiContentSource fixes: lost last wiki doc, thread leak in 'forever' mode.
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1417788 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
bfde8ec0e1
commit
e7cc2239fd
|
@ -202,6 +202,9 @@ Bug Fixes
|
||||||
* LUCENE-4009: Improve TermsFilter.toString (Tim Costermans via Chris
|
* LUCENE-4009: Improve TermsFilter.toString (Tim Costermans via Chris
|
||||||
Male, Mike McCandless)
|
Male, Mike McCandless)
|
||||||
|
|
||||||
|
* LUCENE-4588: Benchmark's EnwikiContentSource was discarding last wiki
|
||||||
|
document and had leaking threads in 'forever' mode. (Doron Cohen)
|
||||||
|
|
||||||
Changes in Runtime Behavior
|
Changes in Runtime Behavior
|
||||||
|
|
||||||
* LUCENE-4586: Change default ResultMode of FacetRequest to PER_NODE_IN_TREE.
|
* LUCENE-4586: Change default ResultMode of FacetRequest to PER_NODE_IN_TREE.
|
||||||
|
|
|
@ -53,6 +53,7 @@ public class EnwikiContentSource extends ContentSource {
|
||||||
private class Parser extends DefaultHandler implements Runnable {
|
private class Parser extends DefaultHandler implements Runnable {
|
||||||
private Thread t;
|
private Thread t;
|
||||||
private boolean threadDone;
|
private boolean threadDone;
|
||||||
|
private boolean stopped = false;
|
||||||
private String[] tuple;
|
private String[] tuple;
|
||||||
private NoMoreDataException nmde;
|
private NoMoreDataException nmde;
|
||||||
private StringBuilder contents = new StringBuilder();
|
private StringBuilder contents = new StringBuilder();
|
||||||
|
@ -70,31 +71,31 @@ public class EnwikiContentSource extends ContentSource {
|
||||||
}
|
}
|
||||||
String[] result;
|
String[] result;
|
||||||
synchronized(this){
|
synchronized(this){
|
||||||
while(tuple == null && nmde == null && !threadDone) {
|
while(tuple == null && nmde == null && !threadDone && !stopped) {
|
||||||
try {
|
try {
|
||||||
wait();
|
wait();
|
||||||
} catch (InterruptedException ie) {
|
} catch (InterruptedException ie) {
|
||||||
throw new ThreadInterruptedException(ie);
|
throw new ThreadInterruptedException(ie);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if (tuple != null) {
|
||||||
|
result = tuple;
|
||||||
|
tuple = null;
|
||||||
|
notify();
|
||||||
|
return result;
|
||||||
|
}
|
||||||
if (nmde != null) {
|
if (nmde != null) {
|
||||||
// Set to null so we will re-start thread in case
|
// Set to null so we will re-start thread in case
|
||||||
// we are re-used:
|
// we are re-used:
|
||||||
t = null;
|
t = null;
|
||||||
throw nmde;
|
throw nmde;
|
||||||
}
|
}
|
||||||
if (t != null && threadDone) {
|
// The thread has exited yet did not hit end of
|
||||||
// The thread has exited yet did not hit end of
|
// data, so this means it hit an exception. We
|
||||||
// data, so this means it hit an exception. We
|
// throw NoMorDataException here to force
|
||||||
// throw NoMorDataException here to force
|
// benchmark to stop the current alg:
|
||||||
// benchmark to stop the current alg:
|
throw new NoMoreDataException();
|
||||||
throw new NoMoreDataException();
|
|
||||||
}
|
|
||||||
result = tuple;
|
|
||||||
tuple = null;
|
|
||||||
notify();
|
|
||||||
}
|
}
|
||||||
return result;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
String time(String original) {
|
String time(String original) {
|
||||||
|
@ -132,7 +133,7 @@ public class EnwikiContentSource extends ContentSource {
|
||||||
tmpTuple[BODY] = body.replaceAll("[\t\n]", " ");
|
tmpTuple[BODY] = body.replaceAll("[\t\n]", " ");
|
||||||
tmpTuple[ID] = id;
|
tmpTuple[ID] = id;
|
||||||
synchronized(this) {
|
synchronized(this) {
|
||||||
while (tuple != null) {
|
while (tuple != null && !stopped) {
|
||||||
try {
|
try {
|
||||||
wait();
|
wait();
|
||||||
} catch (InterruptedException ie) {
|
} catch (InterruptedException ie) {
|
||||||
|
@ -175,7 +176,7 @@ public class EnwikiContentSource extends ContentSource {
|
||||||
XMLReader reader = XMLReaderFactory.createXMLReader();
|
XMLReader reader = XMLReaderFactory.createXMLReader();
|
||||||
reader.setContentHandler(this);
|
reader.setContentHandler(this);
|
||||||
reader.setErrorHandler(this);
|
reader.setErrorHandler(this);
|
||||||
while(true){
|
while(!stopped){
|
||||||
final InputStream localFileIS = is;
|
final InputStream localFileIS = is;
|
||||||
try {
|
try {
|
||||||
// To work around a bug in XERCES (XERCESJ-1257), we assume the XML is always UTF8, so we simply provide reader.
|
// To work around a bug in XERCES (XERCESJ-1257), we assume the XML is always UTF8, so we simply provide reader.
|
||||||
|
@ -186,8 +187,7 @@ public class EnwikiContentSource extends ContentSource {
|
||||||
} catch (IOException ioe) {
|
} catch (IOException ioe) {
|
||||||
synchronized(EnwikiContentSource.this) {
|
synchronized(EnwikiContentSource.this) {
|
||||||
if (localFileIS != is) {
|
if (localFileIS != is) {
|
||||||
// fileIS was closed on us, so, just fall
|
// fileIS was closed on us, so, just fall through
|
||||||
// through
|
|
||||||
} else
|
} else
|
||||||
// Exception is real
|
// Exception is real
|
||||||
throw ioe;
|
throw ioe;
|
||||||
|
@ -200,7 +200,7 @@ public class EnwikiContentSource extends ContentSource {
|
||||||
return;
|
return;
|
||||||
} else if (localFileIS == is) {
|
} else if (localFileIS == is) {
|
||||||
// If file is not already re-opened then re-open it now
|
// If file is not already re-opened then re-open it now
|
||||||
is = StreamUtils.inputStream(file);
|
is = openInputStream();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -238,6 +238,17 @@ public class EnwikiContentSource extends ContentSource {
|
||||||
// this element should be discarded.
|
// this element should be discarded.
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private void stop() {
|
||||||
|
synchronized (this) {
|
||||||
|
stopped = true;
|
||||||
|
if (tuple != null) {
|
||||||
|
tuple = null;
|
||||||
|
notify();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private static final Map<String,Integer> ELEMENTS = new HashMap<String,Integer>();
|
private static final Map<String,Integer> ELEMENTS = new HashMap<String,Integer>();
|
||||||
|
@ -284,6 +295,7 @@ public class EnwikiContentSource extends ContentSource {
|
||||||
is.close();
|
is.close();
|
||||||
is = null;
|
is = null;
|
||||||
}
|
}
|
||||||
|
parser.stop();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -301,7 +313,12 @@ public class EnwikiContentSource extends ContentSource {
|
||||||
@Override
|
@Override
|
||||||
public void resetInputs() throws IOException {
|
public void resetInputs() throws IOException {
|
||||||
super.resetInputs();
|
super.resetInputs();
|
||||||
is = StreamUtils.inputStream(file);
|
is = openInputStream();
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Open the input stream. */
|
||||||
|
protected InputStream openInputStream() throws IOException {
|
||||||
|
return StreamUtils.inputStream(file);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -309,10 +326,9 @@ public class EnwikiContentSource extends ContentSource {
|
||||||
super.setConfig(config);
|
super.setConfig(config);
|
||||||
keepImages = config.get("keep.image.only.docs", true);
|
keepImages = config.get("keep.image.only.docs", true);
|
||||||
String fileName = config.get("docs.file", null);
|
String fileName = config.get("docs.file", null);
|
||||||
if (fileName == null) {
|
if (fileName != null) {
|
||||||
throw new IllegalArgumentException("docs.file must be set");
|
file = new File(fileName).getAbsoluteFile();
|
||||||
}
|
}
|
||||||
file = new File(fileName).getAbsoluteFile();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,179 @@
|
||||||
|
package org.apache.lucene.benchmark.byTask.feeds;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.InputStream;
|
||||||
|
import java.text.ParseException;
|
||||||
|
import java.util.Properties;
|
||||||
|
|
||||||
|
import org.apache.lucene.benchmark.byTask.utils.Config;
|
||||||
|
import org.apache.lucene.util.LuceneTestCase;
|
||||||
|
import org.junit.Ignore;
|
||||||
|
import org.junit.Test;
|
||||||
|
|
||||||
|
public class EnwikiContentSourceTest extends LuceneTestCase {
|
||||||
|
|
||||||
|
/** An EnwikiContentSource which works on a String and not files. */
|
||||||
|
private static class StringableEnwikiSource extends EnwikiContentSource {
|
||||||
|
|
||||||
|
private final String docs;
|
||||||
|
|
||||||
|
public StringableEnwikiSource(String docs) {
|
||||||
|
this.docs = docs;
|
||||||
|
}
|
||||||
|
|
||||||
|
@SuppressWarnings("deprecation") // fine for the characters used in this test
|
||||||
|
@Override
|
||||||
|
protected InputStream openInputStream() throws IOException {
|
||||||
|
return new java.io.StringBufferInputStream(docs);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
private void assertDocData(DocData dd, String expName, String expTitle, String expBody, String expDate)
|
||||||
|
throws ParseException {
|
||||||
|
assertNotNull(dd);
|
||||||
|
assertEquals(expName, dd.getName());
|
||||||
|
assertEquals(expTitle, dd.getTitle());
|
||||||
|
assertEquals(expBody, dd.getBody());
|
||||||
|
assertEquals(expDate, dd.getDate());
|
||||||
|
}
|
||||||
|
|
||||||
|
private void assertNoMoreDataException(EnwikiContentSource stdm) throws Exception {
|
||||||
|
try {
|
||||||
|
stdm.getNextDocData(null);
|
||||||
|
fail("Expecting NoMoreDataException");
|
||||||
|
} catch (NoMoreDataException e) {
|
||||||
|
// expected
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private final String PAGE1 =
|
||||||
|
" <page>\r\n" +
|
||||||
|
" <title>Title1</title>\r\n" +
|
||||||
|
" <ns>0</ns>\r\n" +
|
||||||
|
" <id>1</id>\r\n" +
|
||||||
|
" <revision>\r\n" +
|
||||||
|
" <id>11</id>\r\n" +
|
||||||
|
" <parentid>111</parentid>\r\n" +
|
||||||
|
" <timestamp>2011-09-14T11:35:09Z</timestamp>\r\n" +
|
||||||
|
" <contributor>\r\n" +
|
||||||
|
" <username>Mister1111</username>\r\n" +
|
||||||
|
" <id>1111</id>\r\n" +
|
||||||
|
" </contributor>\r\n" +
|
||||||
|
" <minor />\r\n" +
|
||||||
|
" <comment>/* Never mind */</comment>\r\n" +
|
||||||
|
" <text>Some text 1 here</text>\r\n" +
|
||||||
|
" </revision>\r\n" +
|
||||||
|
" </page>\r\n";
|
||||||
|
|
||||||
|
private final String PAGE2 =
|
||||||
|
" <page>\r\n" +
|
||||||
|
" <title>Title2</title>\r\n" +
|
||||||
|
" <ns>0</ns>\r\n" +
|
||||||
|
" <id>2</id>\r\n" +
|
||||||
|
" <revision>\r\n" +
|
||||||
|
" <id>22</id>\r\n" +
|
||||||
|
" <parentid>222</parentid>\r\n" +
|
||||||
|
" <timestamp>2022-09-14T22:35:09Z</timestamp>\r\n" +
|
||||||
|
" <contributor>\r\n" +
|
||||||
|
" <username>Mister2222</username>\r\n" +
|
||||||
|
" <id>2222</id>\r\n" +
|
||||||
|
" </contributor>\r\n" +
|
||||||
|
" <minor />\r\n" +
|
||||||
|
" <comment>/* Never mind */</comment>\r\n" +
|
||||||
|
" <text>Some text 2 here</text>\r\n" +
|
||||||
|
" </revision>\r\n" +
|
||||||
|
" </page>\r\n";
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testOneDocument() throws Exception {
|
||||||
|
String docs =
|
||||||
|
"<mediawiki>\r\n" +
|
||||||
|
PAGE1 +
|
||||||
|
"</mediawiki>";
|
||||||
|
|
||||||
|
EnwikiContentSource source = createContentSource(docs, false);
|
||||||
|
|
||||||
|
DocData dd = source.getNextDocData(new DocData());
|
||||||
|
assertDocData(dd, "1", "Title1", "Some text 1 here", "14-SEP-2011 11:35:09.000");
|
||||||
|
|
||||||
|
assertNoMoreDataException(source);
|
||||||
|
}
|
||||||
|
|
||||||
|
private EnwikiContentSource createContentSource(String docs, boolean forever) throws IOException {
|
||||||
|
|
||||||
|
Properties props = new Properties();
|
||||||
|
props.setProperty("print.props", "false");
|
||||||
|
props.setProperty("content.source.forever", Boolean.toString(forever));
|
||||||
|
Config config = new Config(props);
|
||||||
|
|
||||||
|
EnwikiContentSource source = new StringableEnwikiSource(docs);
|
||||||
|
source.setConfig(config);
|
||||||
|
|
||||||
|
// doc-maker just for initiating content source inputs
|
||||||
|
DocMaker docMaker = new DocMaker();
|
||||||
|
docMaker.setConfig(config, source);
|
||||||
|
docMaker.resetInputs();
|
||||||
|
return source;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testTwoDocuments() throws Exception {
|
||||||
|
String docs =
|
||||||
|
"<mediawiki>\r\n" +
|
||||||
|
PAGE1 +
|
||||||
|
PAGE2 +
|
||||||
|
"</mediawiki>";
|
||||||
|
|
||||||
|
EnwikiContentSource source = createContentSource(docs, false);
|
||||||
|
|
||||||
|
DocData dd1 = source.getNextDocData(new DocData());
|
||||||
|
assertDocData(dd1, "1", "Title1", "Some text 1 here", "14-SEP-2011 11:35:09.000");
|
||||||
|
|
||||||
|
DocData dd2 = source.getNextDocData(new DocData());
|
||||||
|
assertDocData(dd2, "2", "Title2", "Some text 2 here", "14-SEP-2022 22:35:09.000");
|
||||||
|
|
||||||
|
assertNoMoreDataException(source);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testForever() throws Exception {
|
||||||
|
String docs =
|
||||||
|
"<mediawiki>\r\n" +
|
||||||
|
PAGE1 +
|
||||||
|
PAGE2 +
|
||||||
|
"</mediawiki>";
|
||||||
|
|
||||||
|
EnwikiContentSource source = createContentSource(docs, true);
|
||||||
|
|
||||||
|
// same documents several times
|
||||||
|
for (int i=0; i<3; i++) {
|
||||||
|
DocData dd1 = source.getNextDocData(new DocData());
|
||||||
|
assertDocData(dd1, "1", "Title1", "Some text 1 here", "14-SEP-2011 11:35:09.000");
|
||||||
|
|
||||||
|
DocData dd2 = source.getNextDocData(new DocData());
|
||||||
|
assertDocData(dd2, "2", "Title2", "Some text 2 here", "14-SEP-2022 22:35:09.000");
|
||||||
|
// Don't test that NoMoreDataException is thrown, since the forever flag is turned on.
|
||||||
|
}
|
||||||
|
|
||||||
|
source.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
Loading…
Reference in New Issue