mirror of https://github.com/apache/lucene.git
LUCENE-2413: Move PerFieldAnalyzerWrapper and LengthFilter
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@940632 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
1a9fab6982
commit
5f68c89e2d
|
@ -8,6 +8,8 @@ Changes in backwards compatibility policy
|
||||||
- o.a.l.analysis.PorterStemFilter -> o.a.l.analysis.en.PorterStemFilter
|
- o.a.l.analysis.PorterStemFilter -> o.a.l.analysis.en.PorterStemFilter
|
||||||
- o.a.l.analysis.ASCIIFoldingFilter -> o.a.l.analysis.miscellaneous.ASCIIFoldingFilter
|
- o.a.l.analysis.ASCIIFoldingFilter -> o.a.l.analysis.miscellaneous.ASCIIFoldingFilter
|
||||||
- o.a.l.analysis.ISOLatin1AccentFilter -> o.a.l.analysis.miscellaneous.ISOLatin1AccentFilter
|
- o.a.l.analysis.ISOLatin1AccentFilter -> o.a.l.analysis.miscellaneous.ISOLatin1AccentFilter
|
||||||
|
- o.a.l.analysis.LengthFilter -> o.a.l.analysis.miscellaneous.LengthFilter
|
||||||
|
- o.a.l.analysis.PerFieldAnalyzerWrapper -> o.a.l.analysis.miscellaneous.PerFieldAnalyzerWrapper
|
||||||
... (in progress)
|
... (in progress)
|
||||||
|
|
||||||
* LUCENE-1458, LUCENE-2111, LUCENE-2354: Changes from flexible indexing:
|
* LUCENE-1458, LUCENE-2111, LUCENE-2354: Changes from flexible indexing:
|
||||||
|
|
|
@ -56,6 +56,7 @@ public class TestKeywordAnalyzer extends BaseTokenStreamTestCase {
|
||||||
searcher = new IndexSearcher(directory, true);
|
searcher = new IndexSearcher(directory, true);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
public void testPerFieldAnalyzer() throws Exception {
|
public void testPerFieldAnalyzer() throws Exception {
|
||||||
PerFieldAnalyzerWrapper analyzer = new PerFieldAnalyzerWrapper(new SimpleAnalyzer());
|
PerFieldAnalyzerWrapper analyzer = new PerFieldAnalyzerWrapper(new SimpleAnalyzer());
|
||||||
analyzer.addAnalyzer("partnum", new KeywordAnalyzer());
|
analyzer.addAnalyzer("partnum", new KeywordAnalyzer());
|
||||||
|
@ -68,6 +69,7 @@ public class TestKeywordAnalyzer extends BaseTokenStreamTestCase {
|
||||||
"+partnum:Q36 +space", query.toString("description"));
|
"+partnum:Q36 +space", query.toString("description"));
|
||||||
assertEquals("doc found!", 1, hits.length);
|
assertEquals("doc found!", 1, hits.length);
|
||||||
}
|
}
|
||||||
|
*/
|
||||||
|
|
||||||
public void testMutipleDocument() throws Exception {
|
public void testMutipleDocument() throws Exception {
|
||||||
RAMDirectory dir = new RAMDirectory();
|
RAMDirectory dir = new RAMDirectory();
|
||||||
|
|
|
@ -1,41 +0,0 @@
|
||||||
package org.apache.lucene.analysis;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
|
||||||
* this work for additional information regarding copyright ownership.
|
|
||||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
* (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
|
|
||||||
|
|
||||||
import java.io.StringReader;
|
|
||||||
|
|
||||||
public class TestLengthFilter extends BaseTokenStreamTestCase {
|
|
||||||
|
|
||||||
public void testFilter() throws Exception {
|
|
||||||
TokenStream stream = new WhitespaceTokenizer(
|
|
||||||
new StringReader("short toolong evenmuchlongertext a ab toolong foo"));
|
|
||||||
LengthFilter filter = new LengthFilter(stream, 2, 6);
|
|
||||||
TermAttribute termAtt = filter.getAttribute(TermAttribute.class);
|
|
||||||
|
|
||||||
assertTrue(filter.incrementToken());
|
|
||||||
assertEquals("short", termAtt.term());
|
|
||||||
assertTrue(filter.incrementToken());
|
|
||||||
assertEquals("ab", termAtt.term());
|
|
||||||
assertTrue(filter.incrementToken());
|
|
||||||
assertEquals("foo", termAtt.term());
|
|
||||||
assertFalse(filter.incrementToken());
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
|
@ -1,48 +0,0 @@
|
||||||
package org.apache.lucene.analysis;
|
|
||||||
|
|
||||||
import java.io.StringReader;
|
|
||||||
|
|
||||||
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
|
||||||
* this work for additional information regarding copyright ownership.
|
|
||||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
* (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
public class TestPerFieldAnalzyerWrapper extends BaseTokenStreamTestCase {
|
|
||||||
public void testPerField() throws Exception {
|
|
||||||
String text = "Qwerty";
|
|
||||||
PerFieldAnalyzerWrapper analyzer =
|
|
||||||
new PerFieldAnalyzerWrapper(new WhitespaceAnalyzer());
|
|
||||||
analyzer.addAnalyzer("special", new SimpleAnalyzer());
|
|
||||||
|
|
||||||
TokenStream tokenStream = analyzer.tokenStream("field",
|
|
||||||
new StringReader(text));
|
|
||||||
TermAttribute termAtt = tokenStream.getAttribute(TermAttribute.class);
|
|
||||||
|
|
||||||
assertTrue(tokenStream.incrementToken());
|
|
||||||
assertEquals("WhitespaceAnalyzer does not lowercase",
|
|
||||||
"Qwerty",
|
|
||||||
termAtt.term());
|
|
||||||
|
|
||||||
tokenStream = analyzer.tokenStream("special",
|
|
||||||
new StringReader(text));
|
|
||||||
termAtt = tokenStream.getAttribute(TermAttribute.class);
|
|
||||||
assertTrue(tokenStream.incrementToken());
|
|
||||||
assertEquals("SimpleAnalyzer lowercases",
|
|
||||||
"qwerty",
|
|
||||||
termAtt.term());
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,4 +1,4 @@
|
||||||
package org.apache.lucene.analysis;
|
package org.apache.lucene.analysis.miscellaneous;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
@ -19,6 +19,8 @@ package org.apache.lucene.analysis;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
|
||||||
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
|
import org.apache.lucene.analysis.TokenFilter;
|
||||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||||
|
|
||||||
/**
|
/**
|
|
@ -1,4 +1,4 @@
|
||||||
package org.apache.lucene.analysis;
|
package org.apache.lucene.analysis.miscellaneous;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
@ -17,6 +17,8 @@ package org.apache.lucene.analysis;
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
import org.apache.lucene.analysis.Analyzer;
|
||||||
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.document.Fieldable;
|
import org.apache.lucene.document.Fieldable;
|
||||||
|
|
||||||
import java.io.Reader;
|
import java.io.Reader;
|
|
@ -1,4 +1,4 @@
|
||||||
package org.apache.lucene.analysis;
|
package org.apache.lucene.analysis.miscellaneous;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
@ -17,6 +17,7 @@ package org.apache.lucene.analysis;
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
import org.apache.lucene.analysis.*;
|
||||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||||
import java.io.StringReader;
|
import java.io.StringReader;
|
||||||
|
|
|
@ -1,7 +1,8 @@
|
||||||
package org.apache.lucene.analysis;
|
package org.apache.lucene.analysis.miscellaneous;
|
||||||
|
|
||||||
import java.io.StringReader;
|
import java.io.StringReader;
|
||||||
|
|
||||||
|
import org.apache.lucene.analysis.*;
|
||||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||||
|
|
||||||
/**
|
/**
|
|
@ -56,6 +56,7 @@ public class TestKeywordAnalyzer extends BaseTokenStreamTestCase {
|
||||||
searcher = new IndexSearcher(directory, true);
|
searcher = new IndexSearcher(directory, true);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
public void testPerFieldAnalyzer() throws Exception {
|
public void testPerFieldAnalyzer() throws Exception {
|
||||||
PerFieldAnalyzerWrapper analyzer = new PerFieldAnalyzerWrapper(new SimpleAnalyzer(TEST_VERSION_CURRENT));
|
PerFieldAnalyzerWrapper analyzer = new PerFieldAnalyzerWrapper(new SimpleAnalyzer(TEST_VERSION_CURRENT));
|
||||||
analyzer.addAnalyzer("partnum", new KeywordAnalyzer());
|
analyzer.addAnalyzer("partnum", new KeywordAnalyzer());
|
||||||
|
@ -68,6 +69,7 @@ public class TestKeywordAnalyzer extends BaseTokenStreamTestCase {
|
||||||
"+partnum:Q36 +space", query.toString("description"));
|
"+partnum:Q36 +space", query.toString("description"));
|
||||||
assertEquals("doc found!", 1, hits.length);
|
assertEquals("doc found!", 1, hits.length);
|
||||||
}
|
}
|
||||||
|
*/
|
||||||
|
|
||||||
public void testMutipleDocument() throws Exception {
|
public void testMutipleDocument() throws Exception {
|
||||||
RAMDirectory dir = new RAMDirectory();
|
RAMDirectory dir = new RAMDirectory();
|
||||||
|
|
|
@ -19,7 +19,6 @@ package org.apache.lucene.collation;
|
||||||
|
|
||||||
|
|
||||||
import org.apache.lucene.analysis.Analyzer;
|
import org.apache.lucene.analysis.Analyzer;
|
||||||
import org.apache.lucene.analysis.PerFieldAnalyzerWrapper;
|
|
||||||
import org.apache.lucene.analysis.WhitespaceAnalyzer;
|
import org.apache.lucene.analysis.WhitespaceAnalyzer;
|
||||||
import org.apache.lucene.store.RAMDirectory;
|
import org.apache.lucene.store.RAMDirectory;
|
||||||
import org.apache.lucene.index.IndexWriter;
|
import org.apache.lucene.index.IndexWriter;
|
||||||
|
@ -40,6 +39,7 @@ import org.apache.lucene.document.Document;
|
||||||
import org.apache.lucene.util.IndexableBinaryStringTools;
|
import org.apache.lucene.util.IndexableBinaryStringTools;
|
||||||
import org.apache.lucene.util.LuceneTestCase;
|
import org.apache.lucene.util.LuceneTestCase;
|
||||||
|
|
||||||
|
import java.io.StringReader;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
|
||||||
public abstract class CollationTestBase extends LuceneTestCase {
|
public abstract class CollationTestBase extends LuceneTestCase {
|
||||||
|
@ -172,14 +172,8 @@ public abstract class CollationTestBase extends LuceneTestCase {
|
||||||
Analyzer denmarkAnalyzer,
|
Analyzer denmarkAnalyzer,
|
||||||
String usResult) throws Exception {
|
String usResult) throws Exception {
|
||||||
RAMDirectory indexStore = new RAMDirectory();
|
RAMDirectory indexStore = new RAMDirectory();
|
||||||
PerFieldAnalyzerWrapper analyzer
|
|
||||||
= new PerFieldAnalyzerWrapper(new WhitespaceAnalyzer(TEST_VERSION_CURRENT));
|
|
||||||
analyzer.addAnalyzer("US", usAnalyzer);
|
|
||||||
analyzer.addAnalyzer("France", franceAnalyzer);
|
|
||||||
analyzer.addAnalyzer("Sweden", swedenAnalyzer);
|
|
||||||
analyzer.addAnalyzer("Denmark", denmarkAnalyzer);
|
|
||||||
IndexWriter writer = new IndexWriter(indexStore, new IndexWriterConfig(
|
IndexWriter writer = new IndexWriter(indexStore, new IndexWriterConfig(
|
||||||
TEST_VERSION_CURRENT, analyzer));
|
TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT)));
|
||||||
|
|
||||||
// document data:
|
// document data:
|
||||||
// the tracer field is used to determine which document was hit
|
// the tracer field is used to determine which document was hit
|
||||||
|
@ -204,17 +198,13 @@ public abstract class CollationTestBase extends LuceneTestCase {
|
||||||
doc.add(new Field("contents", sortData[i][1],
|
doc.add(new Field("contents", sortData[i][1],
|
||||||
Field.Store.NO, Field.Index.ANALYZED));
|
Field.Store.NO, Field.Index.ANALYZED));
|
||||||
if (sortData[i][2] != null)
|
if (sortData[i][2] != null)
|
||||||
doc.add(new Field("US", sortData[i][2],
|
doc.add(new Field("US", usAnalyzer.reusableTokenStream("US", new StringReader(sortData[i][2]))));
|
||||||
Field.Store.NO, Field.Index.ANALYZED));
|
|
||||||
if (sortData[i][3] != null)
|
if (sortData[i][3] != null)
|
||||||
doc.add(new Field("France", sortData[i][3],
|
doc.add(new Field("France", franceAnalyzer.reusableTokenStream("France", new StringReader(sortData[i][3]))));
|
||||||
Field.Store.NO, Field.Index.ANALYZED));
|
|
||||||
if (sortData[i][4] != null)
|
if (sortData[i][4] != null)
|
||||||
doc.add(new Field("Sweden", sortData[i][4],
|
doc.add(new Field("Sweden", swedenAnalyzer.reusableTokenStream("Sweden", new StringReader(sortData[i][4]))));
|
||||||
Field.Store.NO, Field.Index.ANALYZED));
|
|
||||||
if (sortData[i][5] != null)
|
if (sortData[i][5] != null)
|
||||||
doc.add(new Field("Denmark", sortData[i][5],
|
doc.add(new Field("Denmark", denmarkAnalyzer.reusableTokenStream("Denmark", new StringReader(sortData[i][5]))));
|
||||||
Field.Store.NO, Field.Index.ANALYZED));
|
|
||||||
writer.addDocument(doc);
|
writer.addDocument(doc);
|
||||||
}
|
}
|
||||||
writer.optimize();
|
writer.optimize();
|
||||||
|
|
|
@ -18,7 +18,7 @@
|
||||||
package org.apache.solr.analysis;
|
package org.apache.solr.analysis;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.analysis.LengthFilter;
|
import org.apache.lucene.analysis.miscellaneous.LengthFilter;
|
||||||
|
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue