diff --git a/lucene/contrib/benchmark/CHANGES.txt b/lucene/contrib/benchmark/CHANGES.txt index 9cc34224110..3ba25a272c0 100644 --- a/lucene/contrib/benchmark/CHANGES.txt +++ b/lucene/contrib/benchmark/CHANGES.txt @@ -2,6 +2,12 @@ Lucene Benchmark Contrib Change Log The Benchmark contrib package contains code for benchmarking Lucene in a variety of ways. +4/27/2010: WriteLineDocTask now supports multi-threading. Also, + StringBufferReader was renamed to StringBuilderReader and works on + StringBuilder now. In addition, LongToEnglishCountentSource starts from 0 + (instead of Long.MIN_VAL+10) and wraps around to MIN_VAL (if you ever hit + Long.MAX_VAL). (Shai Erera) + 4/07/2010 LUCENE-2377: Enable the use of NoMergePolicy and NoMergeScheduler by CreateIndexTask. (Shai Erera) diff --git a/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/AbstractQueryMaker.java b/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/AbstractQueryMaker.java index 2e2626d90c9..f59d931edb0 100644 --- a/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/AbstractQueryMaker.java +++ b/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/AbstractQueryMaker.java @@ -17,7 +17,6 @@ package org.apache.lucene.benchmark.byTask.feeds; import org.apache.lucene.search.Query; import org.apache.lucene.benchmark.byTask.utils.Config; -import org.apache.lucene.benchmark.byTask.utils.Format; /** * Abstract base query maker. @@ -45,7 +44,7 @@ public abstract class AbstractQueryMaker implements QueryMaker { StringBuffer sb = new StringBuffer(); if (queries != null) { for (int i = 0; i < queries.length; i++) { - sb.append(i+". "+ Format.simpleName(queries[i].getClass())+" - "+queries[i].toString()); + sb.append(i+". "+ queries[i].getClass().getSimpleName()+" - "+queries[i].toString()); sb.append(newline); } } diff --git a/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DirContentSource.java b/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DirContentSource.java index 8d2a193b623..2fd72170392 100644 --- a/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DirContentSource.java +++ b/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DirContentSource.java @@ -40,8 +40,6 @@ import java.util.Stack; * denotes a relative path (default=work). *
  • docs.dir - specifies the directory the Dir collection. Can be set * to a relative path if "work.dir" is also specified (default=dir-out). - *
  • html.parser - specifies the {@link HTMLParser} class to use for - * parsing the TREC documents content (default=DemoHTMLParser). * */ public class DirContentSource extends ContentSource { diff --git a/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DocMaker.java b/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DocMaker.java index b26c499aa39..191e1ac34f7 100644 --- a/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DocMaker.java +++ b/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DocMaker.java @@ -333,7 +333,7 @@ public class DocMaker { String col = " "; StringBuffer sb = new StringBuffer(); String newline = System.getProperty("line.separator"); - sb.append("------------> ").append(Format.simpleName(getClass())).append(" statistics (").append(printNum).append("): ").append(newline); + sb.append("------------> ").append(getClass().getSimpleName()).append(" statistics (").append(printNum).append("): ").append(newline); int nut = source.getTotalDocsCount(); if (nut > lastPrintedNumUniqueTexts) { print = true; diff --git a/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/LongToEnglishContentSource.java b/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/LongToEnglishContentSource.java index c53ff8eb3bc..4d20e91b3fd 100644 --- a/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/LongToEnglishContentSource.java +++ b/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/LongToEnglishContentSource.java @@ -1,34 +1,55 @@ package org.apache.lucene.benchmark.byTask.feeds; +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + import org.apache.lucene.util.English; import java.io.IOException; import java.util.Date; - /** - * - * - **/ + * Creates documents whose content is a long number starting from + * {@link Long#MIN_VALUE} + 10. + */ public class LongToEnglishContentSource extends ContentSource{ - private long counter = Long.MIN_VALUE + 10; + private long counter = 0; @Override public void close() throws IOException { - } - //TODO: reduce/clean up synchonization + @Override public synchronized DocData getNextDocData(DocData docData) throws NoMoreDataException, IOException { docData.clear(); - docData.setBody(English.longToEnglish(counter)); - docData.setName("doc_" + String.valueOf(counter)); - docData.setTitle("title_" + String.valueOf(counter)); + // store the current counter to avoid synchronization later on + long curCounter; + synchronized (this) { + curCounter = counter; + if (counter == Long.MAX_VALUE){ + counter = Long.MIN_VALUE;//loop around + } else { + ++counter; + } + } + docData.setBody(English.longToEnglish(curCounter)); + docData.setName("doc_" + String.valueOf(curCounter)); + docData.setTitle("title_" + String.valueOf(curCounter)); docData.setDate(new Date()); - if (counter == Long.MAX_VALUE){ - counter = Long.MIN_VALUE + 10;//loop around - } - counter++; return docData; } @@ -36,4 +57,5 @@ public class LongToEnglishContentSource extends ContentSource{ public void resetInputs() throws IOException { counter = Long.MIN_VALUE + 10; } + } diff --git a/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecContentSource.java b/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecContentSource.java index dba774b3e8d..1101e661c91 100644 --- a/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecContentSource.java +++ b/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/TrecContentSource.java @@ -32,7 +32,7 @@ import java.util.Locale; import java.util.zip.GZIPInputStream; import org.apache.lucene.benchmark.byTask.utils.Config; -import org.apache.lucene.benchmark.byTask.utils.StringBufferReader; +import org.apache.lucene.benchmark.byTask.utils.StringBuilderReader; import org.apache.lucene.util.ThreadInterruptedException; /** @@ -53,7 +53,6 @@ import org.apache.lucene.util.ThreadInterruptedException; * */ public class TrecContentSource extends ContentSource { - // TODO (3.0): change StringBuffer to StringBuilder private static final class DateFormatInfo { DateFormat[] dfs; @@ -79,8 +78,8 @@ public class TrecContentSource extends ContentSource { }; private ThreadLocal dateFormats = new ThreadLocal(); - private ThreadLocal trecDocReader = new ThreadLocal(); - private ThreadLocal trecDocBuffer = new ThreadLocal(); + private ThreadLocal trecDocReader = new ThreadLocal(); + private ThreadLocal trecDocBuffer = new ThreadLocal(); private File dataDir = null; private ArrayList inputFiles = new ArrayList(); private int nextFile = 0; @@ -110,19 +109,19 @@ public class TrecContentSource extends ContentSource { return dfi; } - private StringBuffer getDocBuffer() { - StringBuffer sb = trecDocBuffer.get(); + private StringBuilder getDocBuffer() { + StringBuilder sb = trecDocBuffer.get(); if (sb == null) { - sb = new StringBuffer(); + sb = new StringBuilder(); trecDocBuffer.set(sb); } return sb; } - private Reader getTrecDocReader(StringBuffer docBuffer) { - StringBufferReader r = trecDocReader.get(); + private Reader getTrecDocReader(StringBuilder docBuffer) { + StringBuilderReader r = trecDocReader.get(); if (r == null) { - r = new StringBufferReader(docBuffer); + r = new StringBuilderReader(docBuffer); trecDocReader.set(r); } else { r.set(docBuffer); @@ -131,7 +130,7 @@ public class TrecContentSource extends ContentSource { } // read until finding a line that starts with the specified prefix, or a terminating tag has been found. - private void read(StringBuffer buf, String prefix, boolean collectMatchLine, + private void read(StringBuilder buf, String prefix, boolean collectMatchLine, boolean collectAll, String terminatingTag) throws IOException, NoMoreDataException { String sep = ""; @@ -248,7 +247,7 @@ public class TrecContentSource extends ContentSource { openNextFile(); } - StringBuffer docBuf = getDocBuffer(); + StringBuilder docBuf = getDocBuffer(); // 1. skip until doc start docBuf.setLength(0); diff --git a/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/PerfTask.java b/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/PerfTask.java index 024800c4069..6424673fc98 100644 --- a/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/PerfTask.java +++ b/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/PerfTask.java @@ -23,7 +23,6 @@ import org.apache.lucene.benchmark.byTask.PerfRunData; import org.apache.lucene.benchmark.byTask.stats.Points; import org.apache.lucene.benchmark.byTask.stats.TaskStats; import org.apache.lucene.benchmark.byTask.utils.Config; -import org.apache.lucene.benchmark.byTask.utils.Format; /** * An abstract task to be tested for performance.
    @@ -67,7 +66,7 @@ public abstract class PerfTask implements Cloneable { /** Should not be used externally */ private PerfTask() { - name = Format.simpleName(getClass()); + name = getClass().getSimpleName(); if (name.endsWith("Task")) { name = name.substring(0, name.length() - 4); } @@ -99,13 +98,7 @@ public abstract class PerfTask implements Cloneable { this.maxDepthLogStart = config.get("task.max.depth.log",0); String logStepAtt = "log.step"; - // TODO (1.5): call getClass().getSimpleName() instead. - String taskName = getClass().getName(); - int idx = taskName.lastIndexOf('.'); - // To support test internal classes. when we move to getSimpleName, this can be removed. - int idx2 = taskName.indexOf('$', idx); - if (idx2 != -1) idx = idx2; - String taskLogStepAtt = "log.step." + taskName.substring(idx + 1, taskName.length() - 4 /* w/o the 'Task' part */); + String taskLogStepAtt = "log.step." + name; if (config.get(taskLogStepAtt, null) != null) { logStepAtt = taskLogStepAtt; } diff --git a/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/WriteLineDocTask.java b/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/WriteLineDocTask.java index 84798f5796a..f815153a493 100644 --- a/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/WriteLineDocTask.java +++ b/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/WriteLineDocTask.java @@ -22,6 +22,7 @@ import java.io.BufferedWriter; import java.io.FileOutputStream; import java.io.OutputStream; import java.io.OutputStreamWriter; +import java.io.PrintWriter; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -56,8 +57,9 @@ public class WriteLineDocTask extends PerfTask { private static final Matcher NORMALIZER = Pattern.compile("[\t\r\n]+").matcher(""); private int docSize = 0; - private BufferedWriter lineFileOut = null; + private PrintWriter lineFileOut = null; private DocMaker docMaker; + private ThreadLocal threadBuffer = new ThreadLocal(); public WriteLineDocTask(PerfRunData runData) throws Exception { super(runData); @@ -85,7 +87,7 @@ public class WriteLineDocTask extends PerfTask { out = new BufferedOutputStream(out, 1 << 16); out = new CompressorStreamFactory().createCompressorOutputStream("bzip2", out); } - lineFileOut = new BufferedWriter(new OutputStreamWriter(out, "UTF-8"), 1 << 16); + lineFileOut = new PrintWriter(new BufferedWriter(new OutputStreamWriter(out, "UTF-8"), 1 << 16)); docMaker = runData.getDocMaker(); } @@ -109,12 +111,15 @@ public class WriteLineDocTask extends PerfTask { f = doc.getField(DocMaker.DATE_FIELD); String date = f != null ? NORMALIZER.reset(f.stringValue()).replaceAll(" ") : ""; - lineFileOut.write(title, 0, title.length()); - lineFileOut.write(SEP); - lineFileOut.write(date, 0, date.length()); - lineFileOut.write(SEP); - lineFileOut.write(body, 0, body.length()); - lineFileOut.newLine(); + StringBuilder sb = threadBuffer.get(); + if (sb == null) { + sb = new StringBuilder(); + threadBuffer.set(sb); + } + sb.setLength(0); + sb.append(title).append(SEP).append(date).append(SEP).append(body); + // lineFileOut is a PrintWriter, which synchronizes internally in println. + lineFileOut.println(sb.toString()); } return 1; } diff --git a/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/utils/Format.java b/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/utils/Format.java index 811906515dc..a571fb5b314 100644 --- a/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/utils/Format.java +++ b/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/utils/Format.java @@ -107,19 +107,4 @@ public class Format { return res.substring(res.length() - col.length()); } - /** - * Extract simple class name - * @param cls class whose simple name is required - * @return simple class name - */ - public static String simpleName (Class cls) { - String c = cls.getName(); - String p = cls.getPackage().getName(); - int k = c.lastIndexOf(p+"."); - if (k<0) { - return c; - } - return c.substring(k+1+p.length()); - } - } diff --git a/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/utils/StringBufferReader.java b/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/utils/StringBuilderReader.java similarity index 85% rename from lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/utils/StringBufferReader.java rename to lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/utils/StringBuilderReader.java index d8f56fb3302..c6e9510e01d 100644 --- a/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/utils/StringBufferReader.java +++ b/lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/utils/StringBuilderReader.java @@ -21,22 +21,22 @@ import java.io.IOException; import java.io.Reader; /** - * Implements a {@link Reader} over a {@link StringBuffer} instance. Although + * Implements a {@link Reader} over a {@link StringBuilder} instance. Although * one can use {@link java.io.StringReader} by passing it - * {@link StringBuffer#toString()}, it is better to use this class, as it - * doesn't mark the passed-in {@link StringBuffer} as shared (which will cause + * {@link StringBuilder#toString()}, it is better to use this class, as it + * doesn't mark the passed-in {@link StringBuilder} as shared (which will cause * inner char[] allocations at the next append() attempt).
    * Notes: *
      - *
    • This implementation assumes the underlying {@link StringBuffer} is not + *
    • This implementation assumes the underlying {@link StringBuilder} is not * changed during the use of this {@link Reader} implementation. *
    • This implementation is thread-safe. *
    • The implementation looks very much like {@link java.io.StringReader} (for * the right reasons). *
    • If one wants to reuse that instance, then the following needs to be done: *
      - * StringBuffer sb = new StringBuffer("some text");
      - * Reader reader = new StringBufferReader(sb);
      + * StringBuilder sb = new StringBuilder("some text");
      + * Reader reader = new StringBuilderReader(sb);
        * ... read from reader - don't close it ! ...
        * sb.setLength(0);
        * sb.append("some new text");
      @@ -45,23 +45,21 @@ import java.io.Reader;
        * 
      *
    */ -public class StringBufferReader extends Reader { +public class StringBuilderReader extends Reader { - // TODO (3.0): change to StringBuffer (including the name of the class) - - // The StringBuffer to read from. - private StringBuffer sb; + // The StringBuilder to read from. + private StringBuilder sb; // The length of 'sb'. private int length; - // The next position to read from the StringBuffer. + // The next position to read from the StringBuilder. private int next = 0; // The mark position. The default value 0 means the start of the text. private int mark = 0; - public StringBufferReader(StringBuffer sb) { + public StringBuilderReader(StringBuilder sb) { set(sb); } @@ -85,7 +83,7 @@ public class StringBufferReader extends Reader { * * @param readAheadLimit Limit on the number of characters that may be read * while still preserving the mark. Because the stream's input comes - * from a StringBuffer, there is no actual limit, so this argument + * from a StringBuilder, there is no actual limit, so this argument * must not be negative, but is otherwise ignored. * @exception IllegalArgumentException If readAheadLimit is < 0 * @exception IOException If an I/O error occurs @@ -156,7 +154,7 @@ public class StringBufferReader extends Reader { } } - public void set(StringBuffer sb) { + public void set(StringBuilder sb) { synchronized (lock) { this.sb = sb; length = sb.length(); diff --git a/lucene/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/tasks/WriteLineDocTaskTest.java b/lucene/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/tasks/WriteLineDocTaskTest.java index ee93b968f84..f2121164bf2 100644 --- a/lucene/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/tasks/WriteLineDocTaskTest.java +++ b/lucene/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/tasks/WriteLineDocTaskTest.java @@ -22,7 +22,9 @@ import java.io.File; import java.io.FileInputStream; import java.io.InputStream; import java.io.InputStreamReader; +import java.util.HashSet; import java.util.Properties; +import java.util.Set; import org.apache.commons.compress.compressors.CompressorStreamFactory; import org.apache.lucene.benchmark.BenchmarkTestCase; @@ -97,6 +99,21 @@ public class WriteLineDocTaskTest extends BenchmarkTestCase { } } + // class has to be public so that Class.forName.newInstance() will work + public static final class ThreadingDocMaker extends DocMaker { + + @Override + public Document makeDocument() throws Exception { + Document doc = new Document(); + String name = Thread.currentThread().getName(); + doc.add(new Field(BODY_FIELD, "body_" + name, Store.NO, Index.NOT_ANALYZED_NO_NORMS)); + doc.add(new Field(TITLE_FIELD, "title_" + name, Store.NO, Index.NOT_ANALYZED_NO_NORMS)); + doc.add(new Field(DATE_FIELD, "date_" + name, Store.NO, Index.NOT_ANALYZED_NO_NORMS)); + return doc; + } + + } + private static final CompressorStreamFactory csFactory = new CompressorStreamFactory(); private PerfRunData createPerfRunData(File file, boolean setBZCompress, @@ -225,5 +242,49 @@ public class WriteLineDocTaskTest extends BenchmarkTestCase { br.close(); } } - + + public void testMultiThreaded() throws Exception { + File file = new File(getWorkDir(), "one-line"); + PerfRunData runData = createPerfRunData(file, false, null, ThreadingDocMaker.class.getName()); + final WriteLineDocTask wldt = new WriteLineDocTask(runData); + Thread[] threads = new Thread[10]; + for (int i = 0; i < threads.length; i++) { + threads[i] = new Thread("t" + i) { + @Override + public void run() { + try { + wldt.doLogic(); + } catch (Exception e) { + throw new RuntimeException(e); + } + } + }; + } + + for (Thread t : threads) t.start(); + for (Thread t : threads) t.join(); + + wldt.close(); + + Set ids = new HashSet(); + BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(file), "utf-8")); + try { + for (int i = 0; i < threads.length; i++) { + String line = br.readLine(); + String[] parts = line.split(Character.toString(WriteLineDocTask.SEP)); + assertEquals(3, parts.length); + // check that all thread names written are the same in the same line + String tname = parts[0].substring(parts[0].indexOf('_')); + ids.add(tname); + assertEquals(tname, parts[1].substring(parts[1].indexOf('_'))); + assertEquals(tname, parts[2].substring(parts[2].indexOf('_'))); + } + // only threads.length lines should exist + assertNull(br.readLine()); + assertEquals(threads.length, ids.size()); + } finally { + br.close(); + } + } + }