From e53aee7739be1c04bd1673a55b4956efb63c337f Mon Sep 17 00:00:00 2001 From: Uwe Schindler Date: Tue, 21 Aug 2012 12:43:27 +0000 Subject: [PATCH] LUCENE-4317: Improve reuse of internal TokenStreams in oal.document.Field. git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1375507 13f79535-47bb-0310-9956-ffa450edef68 --- lucene/CHANGES.txt | 5 + .../org/apache/lucene/document/Field.java | 138 +++++++++--------- 2 files changed, 77 insertions(+), 66 deletions(-) diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index d898dde2c2f..fc3e37513c7 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -88,6 +88,11 @@ Bug Fixes containing non-BMP Unicode characters. (Dawid Weiss, Robert Muir, Mike McCandless) +Optimizations + +* LUCENE-4317: Improve reuse of internal TokenStreams in oal.document.Field. + (Uwe Schindler, Chris Male, Robert Muir) + Build * LUCENE-3985: Upgrade to randomizedtesting 2.0.0. Added support for diff --git a/lucene/core/src/java/org/apache/lucene/document/Field.java b/lucene/core/src/java/org/apache/lucene/document/Field.java index 936365421ec..07ad5c37bd9 100644 --- a/lucene/core/src/java/org/apache/lucene/document/Field.java +++ b/lucene/core/src/java/org/apache/lucene/document/Field.java @@ -73,7 +73,7 @@ public class Field implements IndexableField { // customize how it's tokenized: protected TokenStream tokenStream; - protected transient NumericTokenStream numericTokenStream; + protected transient TokenStream internalTokenStream; protected float boost = 1.0f; @@ -283,9 +283,6 @@ public class Field implements IndexableField { if (!(fieldsData instanceof Byte)) { throw new IllegalArgumentException("cannot change value type from " + fieldsData.getClass().getSimpleName() + " to Byte"); } - if (numericTokenStream != null) { - numericTokenStream.setIntValue(value); - } fieldsData = Byte.valueOf(value); } @@ -293,9 +290,6 @@ public class Field implements IndexableField { if (!(fieldsData instanceof Short)) { throw new IllegalArgumentException("cannot change value type from " + fieldsData.getClass().getSimpleName() + " to Short"); } - if (numericTokenStream != null) { - numericTokenStream.setIntValue(value); - } fieldsData = Short.valueOf(value); } @@ -303,9 +297,6 @@ public class Field implements IndexableField { if (!(fieldsData instanceof Integer)) { throw new IllegalArgumentException("cannot change value type from " + fieldsData.getClass().getSimpleName() + " to Integer"); } - if (numericTokenStream != null) { - numericTokenStream.setIntValue(value); - } fieldsData = Integer.valueOf(value); } @@ -313,9 +304,6 @@ public class Field implements IndexableField { if (!(fieldsData instanceof Long)) { throw new IllegalArgumentException("cannot change value type from " + fieldsData.getClass().getSimpleName() + " to Long"); } - if (numericTokenStream != null) { - numericTokenStream.setLongValue(value); - } fieldsData = Long.valueOf(value); } @@ -323,9 +311,6 @@ public class Field implements IndexableField { if (!(fieldsData instanceof Float)) { throw new IllegalArgumentException("cannot change value type from " + fieldsData.getClass().getSimpleName() + " to Float"); } - if (numericTokenStream != null) { - numericTokenStream.setFloatValue(value); - } fieldsData = Float.valueOf(value); } @@ -333,9 +318,6 @@ public class Field implements IndexableField { if (!(fieldsData instanceof Double)) { throw new IllegalArgumentException("cannot change value type from " + fieldsData.getClass().getSimpleName() + " to Double"); } - if (numericTokenStream != null) { - numericTokenStream.setDoubleValue(value); - } fieldsData = Double.valueOf(value); } @@ -433,62 +415,44 @@ public class Field implements IndexableField { final NumericType numericType = fieldType().numericType(); if (numericType != null) { - if (numericTokenStream == null) { + if (!(internalTokenStream instanceof NumericTokenStream)) { // lazy init the TokenStream as it is heavy to instantiate // (attributes,...) if not needed (stored field loading) - numericTokenStream = new NumericTokenStream(type.numericPrecisionStep()); - // initialize value in TokenStream - final Number val = (Number) fieldsData; - switch (numericType) { - case INT: - numericTokenStream.setIntValue(val.intValue()); - break; - case LONG: - numericTokenStream.setLongValue(val.longValue()); - break; - case FLOAT: - numericTokenStream.setFloatValue(val.floatValue()); - break; - case DOUBLE: - numericTokenStream.setDoubleValue(val.doubleValue()); - break; - default: - assert false : "Should never get here"; - } - } else { - // OK -- previously cached and we already updated if - // setters were called. + internalTokenStream = new NumericTokenStream(type.numericPrecisionStep()); } - - return numericTokenStream; + final NumericTokenStream nts = (NumericTokenStream) internalTokenStream; + // initialize value in TokenStream + final Number val = (Number) fieldsData; + switch (numericType) { + case INT: + nts.setIntValue(val.intValue()); + break; + case LONG: + nts.setLongValue(val.longValue()); + break; + case FLOAT: + nts.setFloatValue(val.floatValue()); + break; + case DOUBLE: + nts.setDoubleValue(val.doubleValue()); + break; + default: + assert false : "Should never get here"; + } + return internalTokenStream; } if (!fieldType().tokenized()) { if (stringValue() == null) { throw new IllegalArgumentException("Non-Tokenized Fields must have a String value"); } - - return new TokenStream() { - CharTermAttribute termAttribute = addAttribute(CharTermAttribute.class); - OffsetAttribute offsetAttribute = addAttribute(OffsetAttribute.class); - boolean used; - - @Override - public boolean incrementToken() { - if (used) { - return false; - } - termAttribute.setEmpty().append(stringValue()); - offsetAttribute.setOffset(0, stringValue().length()); - used = true; - return true; - } - - @Override - public void reset() { - used = false; - } - }; + if (!(internalTokenStream instanceof StringTokenStream)) { + // lazy init the TokenStream as it is heavy to instantiate + // (attributes,...) if not needed (stored field loading) + internalTokenStream = new StringTokenStream(); + } + ((StringTokenStream) internalTokenStream).setValue(stringValue()); + return internalTokenStream; } if (tokenStream != null) { @@ -501,6 +465,48 @@ public class Field implements IndexableField { throw new IllegalArgumentException("Field must have either TokenStream, String, Reader or Number value"); } + + static final class StringTokenStream extends TokenStream { + private final CharTermAttribute termAttribute = addAttribute(CharTermAttribute.class); + private final OffsetAttribute offsetAttribute = addAttribute(OffsetAttribute.class); + private boolean used = false; + private String value = null; + + /** Creates a new TokenStream that returns a String as single token. + *

Warning: Does not initialize the value, you must call + * {@link #setValue()} afterwards! + */ + StringTokenStream() { + } + + /** Sets the string value. */ + void setValue(String value) { + this.value = value; + } + + @Override + public boolean incrementToken() { + if (used) { + return false; + } + clearAttributes(); + termAttribute.append(value); + offsetAttribute.setOffset(0, value.length()); + used = true; + return true; + } + + @Override + public void end() { + final int finalOffset = value.length(); + offsetAttribute.setOffset(finalOffset, finalOffset); + } + + @Override + public void reset() { + used = false; + } + } /** Specifies whether and how a field should be stored. */ public static enum Store {