upgrade to lucene 5 snapshot (will open issue about collators)

2014-11-05 16:25:33 -05:00 · 2014-11-05 16:25:33 -05:00 · c2c0345837
parent 472c21a138
commit c2c0345837
7 changed files with 18 additions and 410 deletions
--- a/README.md
+++ b/README.md
@ -41,7 +41,7 @@ Normalizes characters as explained [here](http://userguide.icu-project.org/trans
    "index" : {
        "analysis" : {
            "analyzer" : {
-                "collation" : {
+                "normalized" : {
                    "tokenizer" : "keyword",
                    "filter" : ["icu_normalizer"]
                }
@ -61,7 +61,7 @@ Folding of unicode characters based on `UTR#30`. It registers itself under `icu_
    "index" : {
        "analysis" : {
            "analyzer" : {
-                "collation" : {
+                "folded" : {
                    "tokenizer" : "keyword",
                    "filter" : ["icu_folding"]
                }
@ -101,81 +101,6 @@ The Following example exempts Swedish characters from the folding. Note that the
 }
 ```

-ICU Collation
-------------
-
-Uses collation token filter. Allows to either specify the rules for collation
-(defined [here](http://www.icu-project.org/userguide/Collate_Customization.html)) using the `rules` parameter
-(can point to a location or expressed in the settings, location can be relative to config location), or using the
-`language` parameter (further specialized by country and variant). By default registers under `icu_collation` or
-`icuCollation` and uses the default locale.
-
-Here is a sample settings:
-
-```js
-{
-    "index" : {
-        "analysis" : {
-            "analyzer" : {
-                "collation" : {
-                    "tokenizer" : "keyword",
-                    "filter" : ["icu_collation"]
-                }
-            }
-        }
-    }
-}
-```
-
-And here is a sample of custom collation:
-
-```js
-{
-    "index" : {
-        "analysis" : {
-            "analyzer" : {
-                "collation" : {
-                    "tokenizer" : "keyword",
-                    "filter" : ["myCollator"]
-                }
-            },
-            "filter" : {
-                "myCollator" : {
-                    "type" : "icu_collation",
-                    "language" : "en"
-                }
-            }
-        }
-    }
-}
-```
-
-Optional options:
-* `strength` - The strength property determines the minimum level of difference considered significant during comparison.
- The default strength for the Collator is `tertiary`, unless specified otherwise by the locale used to create the Collator.
- Possible values: `primary`, `secondary`, `tertiary`, `quaternary` or `identical`.
- See [ICU Collation](http://icu-project.org/apiref/icu4j/com/ibm/icu/text/Collator.html) documentation for a more detailed
- explanation for the specific values.
-* `decomposition` - Possible values: `no` or `canonical`. Defaults to `no`. Setting this decomposition property with
-`canonical` allows the Collator to handle un-normalized text properly, producing the same results as if the text were
-normalized. If `no` is set, it is the user's responsibility to insure that all text is already in the appropriate form
-before a comparison or before getting a CollationKey. Adjusting decomposition mode allows the user to select between
-faster and more complete collation behavior. Since a great many of the world's languages do not require text
-normalization, most locales set `no` as the default decomposition mode.
-
-Expert options:
-* `alternate` - Possible values: `shifted` or `non-ignorable`. Sets the alternate handling for strength `quaternary`
- to be either shifted or non-ignorable. What boils down to ignoring punctuation and whitespace.
-* `caseLevel` - Possible values: `true` or `false`. Default is `false`. Whether case level sorting is required. When
- strength is set to `primary` this will ignore accent differences.
-* `caseFirst` - Possible values: `lower` or `upper`. Useful to control which case is sorted first when case is not ignored
- for strength `tertiary`.
-* `numeric` - Possible values: `true` or `false`. Whether digits are sorted according to numeric representation. For
- example the value `egg-9` is sorted before the value `egg-21`. Defaults to `false`.
-* `variableTop` - Single character or contraction. Controls what is variable for `alternate`.
-* `hiraganaQuaternaryMode` - Possible values: `true` or `false`. Defaults to `false`. Distinguishing between Katakana
- and Hiragana characters in `quaternary` strength .
-
 ICU Tokenizer
 -------------

@ -186,7 +111,7 @@ Breaks text into words according to [UAX #29: Unicode Text Segmentation](http://
    "index" : {
        "analysis" : {
            "analyzer" : {
-                "collation" : {
+                "tokenized" : {
                    "tokenizer" : "icu_tokenizer",
                }
            }
@ -211,7 +136,7 @@ Here is a sample settings:
    "index" : {
        "analysis" : {
            "analyzer" : {
-                "collation" : {
+                "normalized" : {
                    "tokenizer" : "keyword",
                    "char_filter" : ["icu_normalizer"]
                }
--- a/pom.xml
+++ b/pom.xml
@ -33,8 +33,8 @@

    <properties>
        <elasticsearch.version>2.0.0-SNAPSHOT</elasticsearch.version>
-        <lucene.version>4.10.2</lucene.version>
-        <lucene.maven.version>4.10.2</lucene.maven.version>
+        <lucene.version>5.0.0</lucene.version>
+        <lucene.maven.version>5.0.0-snapshot-1636426</lucene.maven.version>
        <tests.jvms>1</tests.jvms>
        <tests.shuffle>true</tests.shuffle>
        <tests.output>onerror</tests.output>
@ -47,6 +47,10 @@
            <id>sonatype</id>
            <url>http://oss.sonatype.org/content/repositories/releases/</url>
        </repository>
+        <repository>
+            <id>Lucene snapshots</id>
+            <url>https://download.elasticsearch.org/lucenesnapshots/maven/</url>
+        </repository>
    </repositories>

    <dependencies>
--- a/src/main/java/org/elasticsearch/index/analysis/IcuCollationTokenFilterFactory.java
+++ b/src/main/java/org/elasticsearch/index/analysis/IcuCollationTokenFilterFactory.java
@ -23,7 +23,6 @@ import com.ibm.icu.text.Collator;
 import com.ibm.icu.text.RuleBasedCollator;
 import com.ibm.icu.util.ULocale;
 import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.collation.ICUCollationKeyFilter;
 import org.elasticsearch.ElasticsearchIllegalArgumentException;
 import org.elasticsearch.common.inject.Inject;
 import org.elasticsearch.common.inject.assistedinject.Assisted;
@ -174,6 +173,7 @@ public class IcuCollationTokenFilterFactory extends AbstractTokenFilterFactory {

    @Override
    public TokenStream create(TokenStream tokenStream) {
-        return new ICUCollationKeyFilter(tokenStream, collator);
+        throw new UnsupportedOperationException("i was deprecated in lucene 4, and now i'm gone");
+        // TODO: lucene does sort keys as binary keys since 4.x
    }
 }
--- a/src/main/java/org/elasticsearch/index/analysis/IcuTokenizerFactory.java
+++ b/src/main/java/org/elasticsearch/index/analysis/IcuTokenizerFactory.java
@ -39,8 +39,8 @@ public class IcuTokenizerFactory extends AbstractTokenizerFactory {
    }

    @Override
-    public Tokenizer create(Reader reader) {
-        return new ICUTokenizer(reader);
+    public Tokenizer create() {
+        return new ICUTokenizer();
    }

 }
--- a/src/main/java/org/elasticsearch/indices/analysis/IcuIndicesAnalysis.java
+++ b/src/main/java/org/elasticsearch/indices/analysis/IcuIndicesAnalysis.java
@ -19,7 +19,6 @@

 package org.elasticsearch.indices.analysis;

-import com.ibm.icu.text.Collator;
 import com.ibm.icu.text.Normalizer2;
 import com.ibm.icu.text.Transliterator;
 import org.apache.lucene.analysis.TokenStream;
@ -27,7 +26,6 @@ import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.icu.ICUFoldingFilter;
 import org.apache.lucene.analysis.icu.ICUTransformFilter;
 import org.apache.lucene.analysis.icu.segmentation.ICUTokenizer;
-import org.apache.lucene.collation.ICUCollationKeyFilter;
 import org.elasticsearch.common.component.AbstractComponent;
 import org.elasticsearch.common.inject.Inject;
 import org.elasticsearch.common.settings.Settings;
@ -36,8 +34,6 @@ import org.elasticsearch.index.analysis.PreBuiltTokenizerFactoryFactory;
 import org.elasticsearch.index.analysis.TokenFilterFactory;
 import org.elasticsearch.index.analysis.TokenizerFactory;

-import java.io.Reader;
-
 /**
 * Registers indices level analysis components so, if not explicitly configured, will be shared
 * among all indices.
@ -55,8 +51,8 @@ public class IcuIndicesAnalysis extends AbstractComponent {
            }

            @Override
-            public Tokenizer create(Reader reader) {
-                return new ICUTokenizer(reader);
+            public Tokenizer create() {
+                return new ICUTokenizer();
            }
        }));

@ -85,18 +81,6 @@ public class IcuIndicesAnalysis extends AbstractComponent {
            }
        }));

-        indicesAnalysisService.tokenFilterFactories().put("icu_collation", new PreBuiltTokenFilterFactoryFactory(new TokenFilterFactory() {
-            @Override
-            public String name() {
-                return "icu_collation";
-            }
-
-            @Override
-            public TokenStream create(TokenStream tokenStream) {
-                return new ICUCollationKeyFilter(tokenStream, Collator.getInstance());
-            }
-        }));
-
        indicesAnalysisService.tokenFilterFactories().put("icu_transform", new PreBuiltTokenFilterFactoryFactory(new TokenFilterFactory() {
            @Override
            public String name() {
--- a/src/test/java/org/elasticsearch/index/analysis/ICUIntegrationTests.java
+++ b/src/test/java/org/elasticsearch/index/analysis/ICUIntegrationTests.java
@ -52,10 +52,8 @@ public class ICUIntegrationTests extends ElasticsearchIntegrationTest {
        Settings settings = ImmutableSettings.builder()
                .put(super.indexSettings())
                .put("index.analysis.analyzer.my_analyzer.tokenizer", "standard")
-                .putArray("index.analysis.analyzer.my_analyzer.filter", "standard", "lowercase", "my_collator")
-                .put("index.analysis.filter.my_collator.type", "icu_collation")
-                .put("index.analysis.filter.my_collator.language", "en")
-                .put("index.analysis.filter.my_collator.strength", "primary")
+                .putArray("index.analysis.analyzer.my_analyzer.filter", "standard", "my_folding")
+                .put("index.analysis.filter.my_folding.type", "icu_folding")
                .build();

        return settings;
--- a/src/test/java/org/elasticsearch/index/analysis/SimpleIcuCollationTokenFilterTests.java
+++ b/src/test/java/org/elasticsearch/index/analysis/SimpleIcuCollationTokenFilterTests.java
@ -1,303 +0,0 @@
-/*
- * Licensed to Elasticsearch under one or more contributor
- * license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright
- * ownership. Elasticsearch licenses this file to you under
- * the Apache License, Version 2.0 (the "License"); you may
- * not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.elasticsearch.index.analysis;
-
-import com.ibm.icu.text.Collator;
-import com.ibm.icu.text.RuleBasedCollator;
-import com.ibm.icu.util.ULocale;
-import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.core.KeywordTokenizer;
-import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
-import org.elasticsearch.common.settings.ImmutableSettings;
-import org.elasticsearch.common.settings.Settings;
-import org.elasticsearch.index.Index;
-import org.elasticsearch.test.ElasticsearchTestCase;
-import org.junit.Test;
-
-import java.io.IOException;
-import java.io.StringReader;
-
-import static org.elasticsearch.index.analysis.AnalysisTestUtils.createAnalysisService;
-import static org.hamcrest.Matchers.equalTo;
-
-// Tests borrowed from Solr's Icu collation key filter factory test.
-public class SimpleIcuCollationTokenFilterTests extends ElasticsearchTestCase {
-
-    /*
-    * Turkish has some funny casing.
-    * This test shows how you can solve this kind of thing easily with collation.
-    * Instead of using LowerCaseFilter, use a turkish collator with primary strength.
-    * Then things will sort and match correctly.
-    */
-    @Test
-    public void testBasicUsage() throws Exception {
-        Index index = new Index("test");
-        Settings settings = ImmutableSettings.settingsBuilder()
-                .put("index.analysis.filter.myCollator.type", "icu_collation")
-                .put("index.analysis.filter.myCollator.language", "tr")
-                .put("index.analysis.filter.myCollator.strength", "primary")
-                .build();
-        AnalysisService analysisService = createAnalysisService(settings);
-
-        String turkishUpperCase = "I WİLL USE TURKİSH CASING";
-        String turkishLowerCase = "ı will use turkish casıng";
-        TokenFilterFactory filterFactory = analysisService.tokenFilter("myCollator");
-
-        TokenStream tsUpper = filterFactory.create(new KeywordTokenizer(new StringReader(turkishUpperCase)));
-        TokenStream tsLower = filterFactory.create(new KeywordTokenizer(new StringReader(turkishLowerCase)));
-        assertCollatesToSame(tsUpper, tsLower);
-    }
-
-    /*
-    * Test usage of the decomposition option for unicode normalization.
-    */
-    @Test
-    public void testNormalization() throws IOException {
-        Index index = new Index("test");
-        Settings settings = ImmutableSettings.settingsBuilder()
-                .put("index.analysis.filter.myCollator.type", "icu_collation")
-                .put("index.analysis.filter.myCollator.language", "tr")
-                .put("index.analysis.filter.myCollator.strength", "primary")
-                .put("index.analysis.filter.myCollator.decomposition", "canonical")
-                .build();
-        AnalysisService analysisService = createAnalysisService(settings);
-
-        String turkishUpperCase = "I W\u0049\u0307LL USE TURKİSH CASING";
-        String turkishLowerCase = "ı will use turkish casıng";
-        TokenFilterFactory filterFactory = analysisService.tokenFilter("myCollator");
-
-        TokenStream tsUpper = filterFactory.create(new KeywordTokenizer(new StringReader(turkishUpperCase)));
-        TokenStream tsLower = filterFactory.create(new KeywordTokenizer(new StringReader(turkishLowerCase)));
-        assertCollatesToSame(tsUpper, tsLower);
-    }
-
-    /*
-    * Test secondary strength, for english case is not significant.
-    */
-    @Test
-    public void testSecondaryStrength() throws IOException {
-        Index index = new Index("test");
-        Settings settings = ImmutableSettings.settingsBuilder()
-                .put("index.analysis.filter.myCollator.type", "icu_collation")
-                .put("index.analysis.filter.myCollator.language", "en")
-                .put("index.analysis.filter.myCollator.strength", "secondary")
-                .put("index.analysis.filter.myCollator.decomposition", "no")
-                .build();
-        AnalysisService analysisService = createAnalysisService(settings);
-
-        String upperCase = "TESTING";
-        String lowerCase = "testing";
-        TokenFilterFactory filterFactory = analysisService.tokenFilter("myCollator");
-
-        TokenStream tsUpper = filterFactory.create(new KeywordTokenizer(new StringReader(upperCase)));
-        TokenStream tsLower = filterFactory.create(new KeywordTokenizer(new StringReader(lowerCase)));
-        assertCollatesToSame(tsUpper, tsLower);
-    }
-
-    /*
-    * Setting alternate=shifted to shift whitespace, punctuation and symbols
-    * to quaternary level
-    */
-    @Test
-    public void testIgnorePunctuation() throws IOException {
-        Index index = new Index("test");
-        Settings settings = ImmutableSettings.settingsBuilder()
-                .put("index.analysis.filter.myCollator.type", "icu_collation")
-                .put("index.analysis.filter.myCollator.language", "en")
-                .put("index.analysis.filter.myCollator.strength", "primary")
-                .put("index.analysis.filter.myCollator.alternate", "shifted")
-                .build();
-        AnalysisService analysisService = createAnalysisService(settings);
-
-        String withPunctuation = "foo-bar";
-        String withoutPunctuation = "foo bar";
-        TokenFilterFactory filterFactory = analysisService.tokenFilter("myCollator");
-
-        TokenStream tsPunctuation = filterFactory.create(new KeywordTokenizer(new StringReader(withPunctuation)));
-        TokenStream tsWithoutPunctuation = filterFactory.create(new KeywordTokenizer(new StringReader(withoutPunctuation)));
-        assertCollatesToSame(tsPunctuation, tsWithoutPunctuation);
-    }
-
-    /*
-    * Setting alternate=shifted and variableTop to shift whitespace, but not
-    * punctuation or symbols, to quaternary level
-    */
-    @Test
-    public void testIgnoreWhitespace() throws IOException {
-        Index index = new Index("test");
-        Settings settings = ImmutableSettings.settingsBuilder()
-                .put("index.analysis.filter.myCollator.type", "icu_collation")
-                .put("index.analysis.filter.myCollator.language", "en")
-                .put("index.analysis.filter.myCollator.strength", "primary")
-                .put("index.analysis.filter.myCollator.alternate", "shifted")
-                .put("index.analysis.filter.myCollator.variableTop", " ")
-                .build();
-        AnalysisService analysisService = createAnalysisService(settings);
-
-        String withSpace = "foo bar";
-        String withoutSpace = "foobar";
-        String withPunctuation = "foo-bar";
-        TokenFilterFactory filterFactory = analysisService.tokenFilter("myCollator");
-
-        TokenStream tsWithSpace = filterFactory.create(new KeywordTokenizer(new StringReader(withSpace)));
-        TokenStream tsWithoutSpace = filterFactory.create(new KeywordTokenizer(new StringReader(withoutSpace)));
-        assertCollatesToSame(tsWithSpace, tsWithoutSpace);
-        // now assert that punctuation still matters: foo-bar < foo bar
-        tsWithSpace = filterFactory.create(new KeywordTokenizer(new StringReader(withSpace)));
-        TokenStream tsWithPunctuation = filterFactory.create(new KeywordTokenizer(new StringReader(withPunctuation)));
-        assertCollation(tsWithPunctuation, tsWithSpace, -1);
-    }
-
-    /*
-    * Setting numeric to encode digits with numeric value, so that
-    * foobar-9 sorts before foobar-10
-    */
-    @Test
-    public void testNumerics() throws IOException {
-        Index index = new Index("test");
-        Settings settings = ImmutableSettings.settingsBuilder()
-                .put("index.analysis.filter.myCollator.type", "icu_collation")
-                .put("index.analysis.filter.myCollator.language", "en")
-                .put("index.analysis.filter.myCollator.numeric", "true")
-                .build();
-        AnalysisService analysisService = createAnalysisService(settings);
-
-        String nine = "foobar-9";
-        String ten = "foobar-10";
-        TokenFilterFactory filterFactory = analysisService.tokenFilter("myCollator");
-
-        TokenStream tsNine = filterFactory.create(new KeywordTokenizer(new StringReader(nine)));
-        TokenStream tsTen = filterFactory.create(new KeywordTokenizer(new StringReader(ten)));
-        assertCollation(tsNine, tsTen, -1);
-    }
-
-    /*
-    * Setting caseLevel=true to create an additional case level between
-    * secondary and tertiary
-    */
-    @Test
-    public void testIgnoreAccentsButNotCase() throws IOException {
-        Index index = new Index("test");
-        Settings settings = ImmutableSettings.settingsBuilder()
-                .put("index.analysis.filter.myCollator.type", "icu_collation")
-                .put("index.analysis.filter.myCollator.language", "en")
-                .put("index.analysis.filter.myCollator.strength", "primary")
-                .put("index.analysis.filter.myCollator.caseLevel", "true")
-                .build();
-        AnalysisService analysisService = createAnalysisService(settings);
-
-        String withAccents = "résumé";
-        String withoutAccents = "resume";
-        String withAccentsUpperCase = "Résumé";
-        String withoutAccentsUpperCase = "Resume";
-        TokenFilterFactory filterFactory = analysisService.tokenFilter("myCollator");
-
-        TokenStream tsWithAccents = filterFactory.create(new KeywordTokenizer(new StringReader(withAccents)));
-        TokenStream tsWithoutAccents = filterFactory.create(new KeywordTokenizer(new StringReader(withoutAccents)));
-        assertCollatesToSame(tsWithAccents, tsWithoutAccents);
-
-        TokenStream tsWithAccentsUpperCase = filterFactory.create(new KeywordTokenizer(new StringReader(withAccentsUpperCase)));
-        TokenStream tsWithoutAccentsUpperCase = filterFactory.create(new KeywordTokenizer(new StringReader(withoutAccentsUpperCase)));
-        assertCollatesToSame(tsWithAccentsUpperCase, tsWithoutAccentsUpperCase);
-
-        // now assert that case still matters: resume < Resume
-        TokenStream tsLower = filterFactory.create(new KeywordTokenizer(new StringReader(withoutAccents)));
-        TokenStream tsUpper = filterFactory.create(new KeywordTokenizer(new StringReader(withoutAccentsUpperCase)));
-        assertCollation(tsLower, tsUpper, -1);
-    }
-
-    /*
-    * Setting caseFirst=upper to cause uppercase strings to sort
-    * before lowercase ones.
-    */
-    @Test
-    public void testUpperCaseFirst() throws IOException {
-        Index index = new Index("test");
-        Settings settings = ImmutableSettings.settingsBuilder()
-                .put("index.analysis.filter.myCollator.type", "icu_collation")
-                .put("index.analysis.filter.myCollator.language", "en")
-                .put("index.analysis.filter.myCollator.strength", "tertiary")
-                .put("index.analysis.filter.myCollator.caseFirst", "upper")
-                .build();
-        AnalysisService analysisService = createAnalysisService(settings);
-
-        String lower = "resume";
-        String upper = "Resume";
-        TokenFilterFactory filterFactory = analysisService.tokenFilter("myCollator");
-
-        TokenStream tsLower = filterFactory.create(new KeywordTokenizer(new StringReader(lower)));
-        TokenStream tsUpper = filterFactory.create(new KeywordTokenizer(new StringReader(upper)));
-        assertCollation(tsUpper, tsLower, -1);
-    }
-
-    /*
-    * For german, you might want oe to sort and match with o umlaut.
-    * This is not the default, but you can make a customized ruleset to do this.
-    *
-    * The default is DIN 5007-1, this shows how to tailor a collator to get DIN 5007-2 behavior.
-    *  http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=4423383
-    */
-    @Test
-    public void testCustomRules() throws Exception {
-        RuleBasedCollator baseCollator = (RuleBasedCollator) Collator.getInstance(new ULocale("de_DE"));
-        String DIN5007_2_tailorings =
-                "& ae , a\u0308 & AE , A\u0308"+
-                        "& oe , o\u0308 & OE , O\u0308"+
-                        "& ue , u\u0308 & UE , u\u0308";
-
-        RuleBasedCollator tailoredCollator = new RuleBasedCollator(baseCollator.getRules() + DIN5007_2_tailorings);
-        String tailoredRules = tailoredCollator.getRules();
-
-        Settings settings = ImmutableSettings.settingsBuilder()
-                .put("index.analysis.filter.myCollator.type", "icu_collation")
-                .put("index.analysis.filter.myCollator.rules", tailoredRules)
-                .put("index.analysis.filter.myCollator.strength", "primary")
-                .build();
-        AnalysisService analysisService = createAnalysisService(settings);
-
-        String germanUmlaut = "Töne";
-        String germanOE = "Toene";
-        TokenFilterFactory filterFactory = analysisService.tokenFilter("myCollator");
-        TokenStream tsUmlaut = filterFactory.create(new KeywordTokenizer(new StringReader(germanUmlaut)));
-        TokenStream tsOE = filterFactory.create(new KeywordTokenizer(new StringReader(germanOE)));
-        assertCollatesToSame(tsUmlaut, tsOE);
-    }
-
-    private void assertCollatesToSame(TokenStream stream1, TokenStream stream2) throws IOException {
-        assertCollation(stream1, stream2, 0);
-    }
-
-    private void assertCollation(TokenStream stream1, TokenStream stream2, int comparison) throws IOException {
-        CharTermAttribute term1 = stream1
-                .addAttribute(CharTermAttribute.class);
-        CharTermAttribute term2 = stream2
-                .addAttribute(CharTermAttribute.class);
-
-        stream1.reset();
-        stream2.reset();
-
-        assertThat(stream1.incrementToken(), equalTo(true));
-        assertThat(stream2.incrementToken(), equalTo(true));
-        assertThat(Integer.signum(term1.toString().compareTo(term2.toString())), equalTo(Integer.signum(comparison)));
-        assertThat(stream1.incrementToken(), equalTo(false));
-        assertThat(stream2.incrementToken(), equalTo(false));
-    }
-
-}