LUCENE-4588: EnwikiContentSource fixes: lost last wiki doc, thread leak in 'forever' mode.

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1417788 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Doron Cohen 2012-12-06 09:54:22 +00:00
parent bfde8ec0e1
commit e7cc2239fd
3 changed files with 219 additions and 21 deletions

View File

@ -202,6 +202,9 @@ Bug Fixes
* LUCENE-4009: Improve TermsFilter.toString (Tim Costermans via Chris * LUCENE-4009: Improve TermsFilter.toString (Tim Costermans via Chris
Male, Mike McCandless) Male, Mike McCandless)
* LUCENE-4588: Benchmark's EnwikiContentSource was discarding last wiki
document and had leaking threads in 'forever' mode. (Doron Cohen)
Changes in Runtime Behavior Changes in Runtime Behavior
* LUCENE-4586: Change default ResultMode of FacetRequest to PER_NODE_IN_TREE. * LUCENE-4586: Change default ResultMode of FacetRequest to PER_NODE_IN_TREE.

View File

@ -53,6 +53,7 @@ public class EnwikiContentSource extends ContentSource {
private class Parser extends DefaultHandler implements Runnable { private class Parser extends DefaultHandler implements Runnable {
private Thread t; private Thread t;
private boolean threadDone; private boolean threadDone;
private boolean stopped = false;
private String[] tuple; private String[] tuple;
private NoMoreDataException nmde; private NoMoreDataException nmde;
private StringBuilder contents = new StringBuilder(); private StringBuilder contents = new StringBuilder();
@ -70,31 +71,31 @@ public class EnwikiContentSource extends ContentSource {
} }
String[] result; String[] result;
synchronized(this){ synchronized(this){
while(tuple == null && nmde == null && !threadDone) { while(tuple == null && nmde == null && !threadDone && !stopped) {
try { try {
wait(); wait();
} catch (InterruptedException ie) { } catch (InterruptedException ie) {
throw new ThreadInterruptedException(ie); throw new ThreadInterruptedException(ie);
} }
} }
if (tuple != null) {
result = tuple;
tuple = null;
notify();
return result;
}
if (nmde != null) { if (nmde != null) {
// Set to null so we will re-start thread in case // Set to null so we will re-start thread in case
// we are re-used: // we are re-used:
t = null; t = null;
throw nmde; throw nmde;
} }
if (t != null && threadDone) { // The thread has exited yet did not hit end of
// The thread has exited yet did not hit end of // data, so this means it hit an exception. We
// data, so this means it hit an exception. We // throw NoMorDataException here to force
// throw NoMorDataException here to force // benchmark to stop the current alg:
// benchmark to stop the current alg: throw new NoMoreDataException();
throw new NoMoreDataException();
}
result = tuple;
tuple = null;
notify();
} }
return result;
} }
String time(String original) { String time(String original) {
@ -132,7 +133,7 @@ public class EnwikiContentSource extends ContentSource {
tmpTuple[BODY] = body.replaceAll("[\t\n]", " "); tmpTuple[BODY] = body.replaceAll("[\t\n]", " ");
tmpTuple[ID] = id; tmpTuple[ID] = id;
synchronized(this) { synchronized(this) {
while (tuple != null) { while (tuple != null && !stopped) {
try { try {
wait(); wait();
} catch (InterruptedException ie) { } catch (InterruptedException ie) {
@ -175,7 +176,7 @@ public class EnwikiContentSource extends ContentSource {
XMLReader reader = XMLReaderFactory.createXMLReader(); XMLReader reader = XMLReaderFactory.createXMLReader();
reader.setContentHandler(this); reader.setContentHandler(this);
reader.setErrorHandler(this); reader.setErrorHandler(this);
while(true){ while(!stopped){
final InputStream localFileIS = is; final InputStream localFileIS = is;
try { try {
// To work around a bug in XERCES (XERCESJ-1257), we assume the XML is always UTF8, so we simply provide reader. // To work around a bug in XERCES (XERCESJ-1257), we assume the XML is always UTF8, so we simply provide reader.
@ -186,8 +187,7 @@ public class EnwikiContentSource extends ContentSource {
} catch (IOException ioe) { } catch (IOException ioe) {
synchronized(EnwikiContentSource.this) { synchronized(EnwikiContentSource.this) {
if (localFileIS != is) { if (localFileIS != is) {
// fileIS was closed on us, so, just fall // fileIS was closed on us, so, just fall through
// through
} else } else
// Exception is real // Exception is real
throw ioe; throw ioe;
@ -200,7 +200,7 @@ public class EnwikiContentSource extends ContentSource {
return; return;
} else if (localFileIS == is) { } else if (localFileIS == is) {
// If file is not already re-opened then re-open it now // If file is not already re-opened then re-open it now
is = StreamUtils.inputStream(file); is = openInputStream();
} }
} }
} }
@ -238,6 +238,17 @@ public class EnwikiContentSource extends ContentSource {
// this element should be discarded. // this element should be discarded.
} }
} }
private void stop() {
synchronized (this) {
stopped = true;
if (tuple != null) {
tuple = null;
notify();
}
}
}
} }
private static final Map<String,Integer> ELEMENTS = new HashMap<String,Integer>(); private static final Map<String,Integer> ELEMENTS = new HashMap<String,Integer>();
@ -284,6 +295,7 @@ public class EnwikiContentSource extends ContentSource {
is.close(); is.close();
is = null; is = null;
} }
parser.stop();
} }
} }
@ -301,7 +313,12 @@ public class EnwikiContentSource extends ContentSource {
@Override @Override
public void resetInputs() throws IOException { public void resetInputs() throws IOException {
super.resetInputs(); super.resetInputs();
is = StreamUtils.inputStream(file); is = openInputStream();
}
/** Open the input stream. */
protected InputStream openInputStream() throws IOException {
return StreamUtils.inputStream(file);
} }
@Override @Override
@ -309,10 +326,9 @@ public class EnwikiContentSource extends ContentSource {
super.setConfig(config); super.setConfig(config);
keepImages = config.get("keep.image.only.docs", true); keepImages = config.get("keep.image.only.docs", true);
String fileName = config.get("docs.file", null); String fileName = config.get("docs.file", null);
if (fileName == null) { if (fileName != null) {
throw new IllegalArgumentException("docs.file must be set"); file = new File(fileName).getAbsoluteFile();
} }
file = new File(fileName).getAbsoluteFile();
} }
} }

View File

@ -0,0 +1,179 @@
package org.apache.lucene.benchmark.byTask.feeds;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import java.io.InputStream;
import java.text.ParseException;
import java.util.Properties;
import org.apache.lucene.benchmark.byTask.utils.Config;
import org.apache.lucene.util.LuceneTestCase;
import org.junit.Ignore;
import org.junit.Test;
public class EnwikiContentSourceTest extends LuceneTestCase {
/** An EnwikiContentSource which works on a String and not files. */
private static class StringableEnwikiSource extends EnwikiContentSource {
private final String docs;
public StringableEnwikiSource(String docs) {
this.docs = docs;
}
@SuppressWarnings("deprecation") // fine for the characters used in this test
@Override
protected InputStream openInputStream() throws IOException {
return new java.io.StringBufferInputStream(docs);
}
}
private void assertDocData(DocData dd, String expName, String expTitle, String expBody, String expDate)
throws ParseException {
assertNotNull(dd);
assertEquals(expName, dd.getName());
assertEquals(expTitle, dd.getTitle());
assertEquals(expBody, dd.getBody());
assertEquals(expDate, dd.getDate());
}
private void assertNoMoreDataException(EnwikiContentSource stdm) throws Exception {
try {
stdm.getNextDocData(null);
fail("Expecting NoMoreDataException");
} catch (NoMoreDataException e) {
// expected
}
}
private final String PAGE1 =
" <page>\r\n" +
" <title>Title1</title>\r\n" +
" <ns>0</ns>\r\n" +
" <id>1</id>\r\n" +
" <revision>\r\n" +
" <id>11</id>\r\n" +
" <parentid>111</parentid>\r\n" +
" <timestamp>2011-09-14T11:35:09Z</timestamp>\r\n" +
" <contributor>\r\n" +
" <username>Mister1111</username>\r\n" +
" <id>1111</id>\r\n" +
" </contributor>\r\n" +
" <minor />\r\n" +
" <comment>/* Never mind */</comment>\r\n" +
" <text>Some text 1 here</text>\r\n" +
" </revision>\r\n" +
" </page>\r\n";
private final String PAGE2 =
" <page>\r\n" +
" <title>Title2</title>\r\n" +
" <ns>0</ns>\r\n" +
" <id>2</id>\r\n" +
" <revision>\r\n" +
" <id>22</id>\r\n" +
" <parentid>222</parentid>\r\n" +
" <timestamp>2022-09-14T22:35:09Z</timestamp>\r\n" +
" <contributor>\r\n" +
" <username>Mister2222</username>\r\n" +
" <id>2222</id>\r\n" +
" </contributor>\r\n" +
" <minor />\r\n" +
" <comment>/* Never mind */</comment>\r\n" +
" <text>Some text 2 here</text>\r\n" +
" </revision>\r\n" +
" </page>\r\n";
@Test
public void testOneDocument() throws Exception {
String docs =
"<mediawiki>\r\n" +
PAGE1 +
"</mediawiki>";
EnwikiContentSource source = createContentSource(docs, false);
DocData dd = source.getNextDocData(new DocData());
assertDocData(dd, "1", "Title1", "Some text 1 here", "14-SEP-2011 11:35:09.000");
assertNoMoreDataException(source);
}
private EnwikiContentSource createContentSource(String docs, boolean forever) throws IOException {
Properties props = new Properties();
props.setProperty("print.props", "false");
props.setProperty("content.source.forever", Boolean.toString(forever));
Config config = new Config(props);
EnwikiContentSource source = new StringableEnwikiSource(docs);
source.setConfig(config);
// doc-maker just for initiating content source inputs
DocMaker docMaker = new DocMaker();
docMaker.setConfig(config, source);
docMaker.resetInputs();
return source;
}
@Test
public void testTwoDocuments() throws Exception {
String docs =
"<mediawiki>\r\n" +
PAGE1 +
PAGE2 +
"</mediawiki>";
EnwikiContentSource source = createContentSource(docs, false);
DocData dd1 = source.getNextDocData(new DocData());
assertDocData(dd1, "1", "Title1", "Some text 1 here", "14-SEP-2011 11:35:09.000");
DocData dd2 = source.getNextDocData(new DocData());
assertDocData(dd2, "2", "Title2", "Some text 2 here", "14-SEP-2022 22:35:09.000");
assertNoMoreDataException(source);
}
@Test
public void testForever() throws Exception {
String docs =
"<mediawiki>\r\n" +
PAGE1 +
PAGE2 +
"</mediawiki>";
EnwikiContentSource source = createContentSource(docs, true);
// same documents several times
for (int i=0; i<3; i++) {
DocData dd1 = source.getNextDocData(new DocData());
assertDocData(dd1, "1", "Title1", "Some text 1 here", "14-SEP-2011 11:35:09.000");
DocData dd2 = source.getNextDocData(new DocData());
assertDocData(dd2, "2", "Title2", "Some text 2 here", "14-SEP-2022 22:35:09.000");
// Don't test that NoMoreDataException is thrown, since the forever flag is turned on.
}
source.close();
}
}