mirror of https://github.com/apache/lucene.git
LUCENE-3071: Add ReversePathHierarchyTokenizer and enable skip on PathHierarchyTokenizer
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1099999 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
6af8928c95
commit
96878534a0
|
@ -29,53 +29,67 @@ import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
|||
* Take something like:
|
||||
*
|
||||
* <pre>
|
||||
* /soemthing/something/else
|
||||
* /something/something/else
|
||||
* </pre>
|
||||
*
|
||||
* and make:
|
||||
*
|
||||
* <pre>
|
||||
* /soemthing
|
||||
* /soemthing/something
|
||||
* /soemthing/something/else
|
||||
* /something
|
||||
* /something/something
|
||||
* /something/something/else
|
||||
* </pre>
|
||||
*
|
||||
*/
|
||||
public class PathHierarchyTokenizer extends Tokenizer {
|
||||
|
||||
public PathHierarchyTokenizer(Reader input) {
|
||||
this(input, DEFAULT_BUFFER_SIZE, DEFAULT_DELIMITER);
|
||||
this(input, DEFAULT_BUFFER_SIZE, DEFAULT_DELIMITER, DEFAULT_DELIMITER, DEFAULT_SKIP);
|
||||
}
|
||||
|
||||
public PathHierarchyTokenizer(Reader input, int skip) {
|
||||
this(input, DEFAULT_BUFFER_SIZE, DEFAULT_DELIMITER, DEFAULT_DELIMITER, skip);
|
||||
}
|
||||
|
||||
public PathHierarchyTokenizer(Reader input, int bufferSize, char delimiter) {
|
||||
this(input, bufferSize, delimiter, delimiter);
|
||||
this(input, bufferSize, delimiter, delimiter, DEFAULT_SKIP);
|
||||
}
|
||||
|
||||
public PathHierarchyTokenizer(Reader input, char delimiter, char replacement) {
|
||||
this(input, DEFAULT_BUFFER_SIZE, delimiter, replacement);
|
||||
this(input, DEFAULT_BUFFER_SIZE, delimiter, replacement, DEFAULT_SKIP);
|
||||
}
|
||||
|
||||
public PathHierarchyTokenizer(Reader input, int bufferSize, char delimiter, char replacement) {
|
||||
public PathHierarchyTokenizer(Reader input, char delimiter, char replacement, int skip) {
|
||||
this(input, DEFAULT_BUFFER_SIZE, delimiter, replacement, skip);
|
||||
}
|
||||
|
||||
public PathHierarchyTokenizer(Reader input, int bufferSize, char delimiter, char replacement, int skip) {
|
||||
super(input);
|
||||
termAtt.resizeBuffer(bufferSize);
|
||||
|
||||
this.delimiter = delimiter;
|
||||
this.replacement = replacement;
|
||||
endDelimiter = false;
|
||||
this.skip = skip;
|
||||
resultToken = new StringBuilder(bufferSize);
|
||||
}
|
||||
|
||||
private static final int DEFAULT_BUFFER_SIZE = 1024;
|
||||
public static final char DEFAULT_DELIMITER = '/';
|
||||
public static final int DEFAULT_SKIP = 0;
|
||||
|
||||
private final char delimiter;
|
||||
private final char replacement;
|
||||
private final int skip;
|
||||
|
||||
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
|
||||
private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
|
||||
private final PositionIncrementAttribute posAtt = addAttribute(PositionIncrementAttribute.class);
|
||||
private int startPosition = 0;
|
||||
private int finalOffset = 0;
|
||||
private boolean endDelimiter;
|
||||
private int skipped = 0;
|
||||
private boolean endDelimiter = false;
|
||||
private StringBuilder resultToken;
|
||||
|
||||
|
||||
@Override
|
||||
public final boolean incrementToken() throws IOException {
|
||||
clearAttributes();
|
||||
|
@ -98,37 +112,63 @@ public class PathHierarchyTokenizer extends Tokenizer {
|
|||
while (true) {
|
||||
int c = input.read();
|
||||
if( c < 0 ){
|
||||
if( skipped > skip ) {
|
||||
length += resultToken.length();
|
||||
termAtt.setLength(length);
|
||||
finalOffset = correctOffset(length);
|
||||
offsetAtt.setOffset(correctOffset(0), finalOffset);
|
||||
finalOffset = correctOffset(startPosition + length);
|
||||
offsetAtt.setOffset(correctOffset(startPosition), finalOffset);
|
||||
if( added ){
|
||||
resultToken.setLength(0);
|
||||
resultToken.append(termAtt.buffer(), 0, length);
|
||||
}
|
||||
return added;
|
||||
}
|
||||
else{
|
||||
finalOffset = correctOffset(startPosition + length);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
if( !added ){
|
||||
added = true;
|
||||
skipped++;
|
||||
if( skipped > skip ){
|
||||
termAtt.append(c == delimiter ? replacement : (char)c);
|
||||
length++;
|
||||
}
|
||||
else {
|
||||
startPosition++;
|
||||
}
|
||||
}
|
||||
else {
|
||||
if( c == delimiter ){
|
||||
if( length > 0 ){
|
||||
if( skipped > skip ){
|
||||
endDelimiter = true;
|
||||
break;
|
||||
}
|
||||
else{
|
||||
skipped++;
|
||||
if( skipped > skip ){
|
||||
termAtt.append(replacement);
|
||||
length++;
|
||||
}
|
||||
else {
|
||||
startPosition++;
|
||||
}
|
||||
}
|
||||
else {
|
||||
if( skipped > skip ){
|
||||
termAtt.append((char)c);
|
||||
length++;
|
||||
}
|
||||
else {
|
||||
startPosition++;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
length += resultToken.length();
|
||||
termAtt.setLength(length);
|
||||
finalOffset = correctOffset(length);
|
||||
offsetAtt.setOffset(correctOffset(0), finalOffset);
|
||||
finalOffset = correctOffset(startPosition + length);
|
||||
offsetAtt.setOffset(correctOffset(startPosition), finalOffset);
|
||||
resultToken.setLength(0);
|
||||
resultToken.append(termAtt.buffer(), 0, length);
|
||||
return true;
|
||||
|
@ -146,5 +186,6 @@ public class PathHierarchyTokenizer extends Tokenizer {
|
|||
resultToken.setLength(0);
|
||||
finalOffset = 0;
|
||||
endDelimiter = false;
|
||||
skipped = 0;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,173 @@
|
|||
package org.apache.lucene.analysis.path;
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||
|
||||
/**
|
||||
*
|
||||
* Take something like:
|
||||
*
|
||||
* <pre>
|
||||
* www.site.co.uk
|
||||
* </pre>
|
||||
*
|
||||
* and make:
|
||||
*
|
||||
* <pre>
|
||||
* www.site.co.uk
|
||||
* site.co.uk
|
||||
* co.uk
|
||||
* uk
|
||||
* </pre>
|
||||
*
|
||||
*/
|
||||
public class ReversePathHierarchyTokenizer extends Tokenizer {
|
||||
|
||||
public ReversePathHierarchyTokenizer(Reader input) {
|
||||
this(input, DEFAULT_BUFFER_SIZE, DEFAULT_DELIMITER, DEFAULT_DELIMITER, DEFAULT_SKIP);
|
||||
}
|
||||
|
||||
public ReversePathHierarchyTokenizer(Reader input, int skip) {
|
||||
this(input, DEFAULT_BUFFER_SIZE, DEFAULT_DELIMITER, DEFAULT_DELIMITER, skip);
|
||||
}
|
||||
|
||||
public ReversePathHierarchyTokenizer(Reader input, int bufferSize, char delimiter) {
|
||||
this(input, bufferSize, delimiter, delimiter, DEFAULT_SKIP);
|
||||
}
|
||||
|
||||
public ReversePathHierarchyTokenizer(Reader input, char delimiter, char replacement) {
|
||||
this(input, DEFAULT_BUFFER_SIZE, delimiter, replacement, DEFAULT_SKIP);
|
||||
}
|
||||
|
||||
public ReversePathHierarchyTokenizer(Reader input, int bufferSize, char delimiter, char replacement) {
|
||||
this(input, bufferSize, delimiter, replacement, DEFAULT_SKIP);
|
||||
}
|
||||
|
||||
public ReversePathHierarchyTokenizer(Reader input, char delimiter, int skip) {
|
||||
this(input, DEFAULT_BUFFER_SIZE, delimiter, delimiter, skip);
|
||||
}
|
||||
|
||||
public ReversePathHierarchyTokenizer(Reader input, char delimiter, char replacement, int skip) {
|
||||
this(input, DEFAULT_BUFFER_SIZE, delimiter, replacement, skip);
|
||||
}
|
||||
|
||||
public ReversePathHierarchyTokenizer(Reader input, int bufferSize, char delimiter, char replacement, int skip) {
|
||||
super(input);
|
||||
termAtt.resizeBuffer(bufferSize);
|
||||
this.delimiter = delimiter;
|
||||
this.replacement = replacement;
|
||||
this.skip = skip;
|
||||
resultToken = new StringBuilder(bufferSize);
|
||||
resultTokenBuffer = new char[bufferSize];
|
||||
delimiterPositions = new ArrayList<Integer>(bufferSize/10);
|
||||
}
|
||||
|
||||
private static final int DEFAULT_BUFFER_SIZE = 1024;
|
||||
public static final char DEFAULT_DELIMITER = '/';
|
||||
public static final int DEFAULT_SKIP = 0;
|
||||
|
||||
private final char delimiter;
|
||||
private final char replacement;
|
||||
private final int skip;
|
||||
|
||||
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
|
||||
private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
|
||||
private final PositionIncrementAttribute posAtt = addAttribute(PositionIncrementAttribute.class);
|
||||
|
||||
private int endPosition = 0;
|
||||
private int finalOffset = 0;
|
||||
private int skipped = 0;
|
||||
private StringBuilder resultToken;
|
||||
|
||||
private List<Integer> delimiterPositions;
|
||||
private int delimitersCount = -1;
|
||||
private char[] resultTokenBuffer;
|
||||
|
||||
@Override
|
||||
public final boolean incrementToken() throws IOException {
|
||||
clearAttributes();
|
||||
if(delimitersCount == -1){
|
||||
int length = 0;
|
||||
delimiterPositions.add(0);
|
||||
while (true) {
|
||||
int c = input.read();
|
||||
if( c < 0 ) {
|
||||
break;
|
||||
}
|
||||
length++;
|
||||
if( c == delimiter ) {
|
||||
delimiterPositions.add(length);
|
||||
resultToken.append(replacement);
|
||||
}
|
||||
else{
|
||||
resultToken.append((char)c);
|
||||
}
|
||||
}
|
||||
delimitersCount = delimiterPositions.size();
|
||||
if( delimiterPositions.get(delimitersCount-1) < length ){
|
||||
delimiterPositions.add(length);
|
||||
delimitersCount++;
|
||||
}
|
||||
if( resultTokenBuffer.length < resultToken.length() ){
|
||||
resultTokenBuffer = new char[resultToken.length()];
|
||||
}
|
||||
resultToken.getChars(0, resultToken.length(), resultTokenBuffer, 0);
|
||||
resultToken.setLength(0);
|
||||
endPosition = delimiterPositions.get(delimitersCount-1 - skip);
|
||||
finalOffset = correctOffset(length);
|
||||
posAtt.setPositionIncrement(1);
|
||||
}
|
||||
else{
|
||||
posAtt.setPositionIncrement(0);
|
||||
}
|
||||
|
||||
while( skipped < delimitersCount-skip-1 ){
|
||||
int start = delimiterPositions.get(skipped);
|
||||
termAtt.copyBuffer(resultTokenBuffer, start, endPosition - start);
|
||||
offsetAtt.setOffset(correctOffset(start), correctOffset(endPosition));
|
||||
skipped++;
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public final void end() {
|
||||
// set final offset
|
||||
offsetAtt.setOffset(finalOffset, finalOffset);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void reset(Reader input) throws IOException {
|
||||
super.reset(input);
|
||||
resultToken.setLength(0);
|
||||
finalOffset = 0;
|
||||
skipped = 0;
|
||||
delimitersCount = -1;
|
||||
delimiterPositions.clear();
|
||||
}
|
||||
}
|
|
@ -127,4 +127,70 @@ public class TestPathHierarchyTokenizer extends BaseTokenStreamTestCase {
|
|||
new int[]{1, 0, 0, 0},
|
||||
path.length());
|
||||
}
|
||||
|
||||
public void testBasicSkip() throws Exception {
|
||||
String path = "/a/b/c";
|
||||
PathHierarchyTokenizer t = new PathHierarchyTokenizer( new StringReader(path), 1 );
|
||||
assertTokenStreamContents(t,
|
||||
new String[]{"/b", "/b/c"},
|
||||
new int[]{2, 2},
|
||||
new int[]{4, 6},
|
||||
new int[]{1, 0},
|
||||
path.length());
|
||||
}
|
||||
|
||||
public void testEndOfDelimiterSkip() throws Exception {
|
||||
String path = "/a/b/c/";
|
||||
PathHierarchyTokenizer t = new PathHierarchyTokenizer( new StringReader(path), 1 );
|
||||
assertTokenStreamContents(t,
|
||||
new String[]{"/b", "/b/c", "/b/c/"},
|
||||
new int[]{2, 2, 2},
|
||||
new int[]{4, 6, 7},
|
||||
new int[]{1, 0, 0},
|
||||
path.length());
|
||||
}
|
||||
|
||||
public void testStartOfCharSkip() throws Exception {
|
||||
String path = "a/b/c";
|
||||
PathHierarchyTokenizer t = new PathHierarchyTokenizer( new StringReader(path), 1 );
|
||||
assertTokenStreamContents(t,
|
||||
new String[]{"/b", "/b/c"},
|
||||
new int[]{1, 1},
|
||||
new int[]{3, 5},
|
||||
new int[]{1, 0},
|
||||
path.length());
|
||||
}
|
||||
|
||||
public void testStartOfCharEndOfDelimiterSkip() throws Exception {
|
||||
String path = "a/b/c/";
|
||||
PathHierarchyTokenizer t = new PathHierarchyTokenizer( new StringReader(path), 1 );
|
||||
assertTokenStreamContents(t,
|
||||
new String[]{"/b", "/b/c", "/b/c/"},
|
||||
new int[]{1, 1, 1},
|
||||
new int[]{3, 5, 6},
|
||||
new int[]{1, 0, 0},
|
||||
path.length());
|
||||
}
|
||||
|
||||
public void testOnlyDelimiterSkip() throws Exception {
|
||||
String path = "/";
|
||||
PathHierarchyTokenizer t = new PathHierarchyTokenizer( new StringReader(path), 1 );
|
||||
assertTokenStreamContents(t,
|
||||
new String[]{},
|
||||
new int[]{},
|
||||
new int[]{},
|
||||
new int[]{},
|
||||
path.length());
|
||||
}
|
||||
|
||||
public void testOnlyDelimitersSkip() throws Exception {
|
||||
String path = "//";
|
||||
PathHierarchyTokenizer t = new PathHierarchyTokenizer( new StringReader(path), 1 );
|
||||
assertTokenStreamContents(t,
|
||||
new String[]{"/"},
|
||||
new int[]{1},
|
||||
new int[]{2},
|
||||
new int[]{1},
|
||||
path.length());
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,157 @@
|
|||
package org.apache.lucene.analysis.path;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.StringReader;
|
||||
|
||||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||
|
||||
public class TestReversePathHierarchyTokenizer extends BaseTokenStreamTestCase {
|
||||
|
||||
public void testBasicReverse() throws Exception {
|
||||
String path = "/a/b/c";
|
||||
ReversePathHierarchyTokenizer t = new ReversePathHierarchyTokenizer( new StringReader(path) );
|
||||
assertTokenStreamContents(t,
|
||||
new String[]{"/a/b/c", "a/b/c", "b/c", "c"},
|
||||
new int[]{0, 1, 3, 5},
|
||||
new int[]{6, 6, 6, 6},
|
||||
new int[]{1, 0, 0, 0},
|
||||
path.length());
|
||||
}
|
||||
|
||||
public void testEndOfDelimiterReverse() throws Exception {
|
||||
String path = "/a/b/c/";
|
||||
ReversePathHierarchyTokenizer t = new ReversePathHierarchyTokenizer( new StringReader(path) );
|
||||
assertTokenStreamContents(t,
|
||||
new String[]{"/a/b/c/", "a/b/c/", "b/c/", "c/"},
|
||||
new int[]{0, 1, 3, 5},
|
||||
new int[]{7, 7, 7, 7},
|
||||
new int[]{1, 0, 0, 0},
|
||||
path.length());
|
||||
}
|
||||
|
||||
public void testStartOfCharReverse() throws Exception {
|
||||
String path = "a/b/c";
|
||||
ReversePathHierarchyTokenizer t = new ReversePathHierarchyTokenizer( new StringReader(path) );
|
||||
assertTokenStreamContents(t,
|
||||
new String[]{"a/b/c", "b/c", "c"},
|
||||
new int[]{0, 2, 4},
|
||||
new int[]{5, 5, 5},
|
||||
new int[]{1, 0, 0},
|
||||
path.length());
|
||||
}
|
||||
|
||||
public void testStartOfCharEndOfDelimiterReverse() throws Exception {
|
||||
String path = "a/b/c/";
|
||||
ReversePathHierarchyTokenizer t = new ReversePathHierarchyTokenizer( new StringReader(path) );
|
||||
assertTokenStreamContents(t,
|
||||
new String[]{"a/b/c/", "b/c/", "c/"},
|
||||
new int[]{0, 2, 4},
|
||||
new int[]{6, 6, 6},
|
||||
new int[]{1, 0, 0},
|
||||
path.length());
|
||||
}
|
||||
|
||||
public void testOnlyDelimiterReverse() throws Exception {
|
||||
String path = "/";
|
||||
ReversePathHierarchyTokenizer t = new ReversePathHierarchyTokenizer( new StringReader(path) );
|
||||
assertTokenStreamContents(t,
|
||||
new String[]{"/"},
|
||||
new int[]{0},
|
||||
new int[]{1},
|
||||
new int[]{1},
|
||||
path.length());
|
||||
}
|
||||
|
||||
public void testOnlyDelimitersReverse() throws Exception {
|
||||
String path = "//";
|
||||
ReversePathHierarchyTokenizer t = new ReversePathHierarchyTokenizer( new StringReader(path) );
|
||||
assertTokenStreamContents(t,
|
||||
new String[]{"//", "/"},
|
||||
new int[]{0, 1},
|
||||
new int[]{2, 2},
|
||||
new int[]{1, 0},
|
||||
path.length());
|
||||
}
|
||||
|
||||
public void testEndOfDelimiterReverseSkip() throws Exception {
|
||||
String path = "/a/b/c/";
|
||||
ReversePathHierarchyTokenizer t = new ReversePathHierarchyTokenizer( new StringReader(path), 1 );
|
||||
assertTokenStreamContents(t,
|
||||
new String[]{"/a/b/", "a/b/", "b/"},
|
||||
new int[]{0, 1, 3},
|
||||
new int[]{5, 5, 5},
|
||||
new int[]{1, 0, 0},
|
||||
path.length());
|
||||
}
|
||||
|
||||
public void testStartOfCharReverseSkip() throws Exception {
|
||||
String path = "a/b/c";
|
||||
ReversePathHierarchyTokenizer t = new ReversePathHierarchyTokenizer( new StringReader(path), 1 );
|
||||
assertTokenStreamContents(t,
|
||||
new String[]{"a/b/", "b/"},
|
||||
new int[]{0, 2},
|
||||
new int[]{4, 4},
|
||||
new int[]{1, 0},
|
||||
path.length());
|
||||
}
|
||||
|
||||
public void testStartOfCharEndOfDelimiterReverseSkip() throws Exception {
|
||||
String path = "a/b/c/";
|
||||
ReversePathHierarchyTokenizer t = new ReversePathHierarchyTokenizer( new StringReader(path), 1 );
|
||||
assertTokenStreamContents(t,
|
||||
new String[]{"a/b/", "b/"},
|
||||
new int[]{0, 2},
|
||||
new int[]{4, 4},
|
||||
new int[]{1, 0},
|
||||
path.length());
|
||||
}
|
||||
|
||||
public void testOnlyDelimiterReverseSkip() throws Exception {
|
||||
String path = "/";
|
||||
ReversePathHierarchyTokenizer t = new ReversePathHierarchyTokenizer( new StringReader(path), 1 );
|
||||
assertTokenStreamContents(t,
|
||||
new String[]{},
|
||||
new int[]{},
|
||||
new int[]{},
|
||||
new int[]{},
|
||||
path.length());
|
||||
}
|
||||
|
||||
public void testOnlyDelimitersReverseSkip() throws Exception {
|
||||
String path = "//";
|
||||
ReversePathHierarchyTokenizer t = new ReversePathHierarchyTokenizer( new StringReader(path), 1 );
|
||||
assertTokenStreamContents(t,
|
||||
new String[]{"/"},
|
||||
new int[]{0},
|
||||
new int[]{1},
|
||||
new int[]{1},
|
||||
path.length());
|
||||
}
|
||||
|
||||
public void testReverseSkip2() throws Exception {
|
||||
String path = "/a/b/c/";
|
||||
ReversePathHierarchyTokenizer t = new ReversePathHierarchyTokenizer( new StringReader(path), 2 );
|
||||
assertTokenStreamContents(t,
|
||||
new String[]{"/a/", "a/"},
|
||||
new int[]{0, 1},
|
||||
new int[]{3, 3},
|
||||
new int[]{1, 0},
|
||||
path.length());
|
||||
}
|
||||
}
|
|
@ -21,6 +21,7 @@ import java.util.Map;
|
|||
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.path.PathHierarchyTokenizer;
|
||||
import org.apache.lucene.analysis.path.ReversePathHierarchyTokenizer;
|
||||
|
||||
|
||||
/**
|
||||
|
@ -37,6 +38,8 @@ public class PathHierarchyTokenizerFactory extends BaseTokenizerFactory {
|
|||
|
||||
private char delimiter;
|
||||
private char replacement;
|
||||
private boolean reverse = false;
|
||||
private int skip = PathHierarchyTokenizer.DEFAULT_SKIP;
|
||||
|
||||
/**
|
||||
* Require a configured pattern
|
||||
|
@ -70,10 +73,23 @@ public class PathHierarchyTokenizerFactory extends BaseTokenizerFactory {
|
|||
else{
|
||||
replacement = delimiter;
|
||||
}
|
||||
|
||||
v = args.get( "reverse" );
|
||||
if( v != null ){
|
||||
reverse = "true".equals( v );
|
||||
}
|
||||
|
||||
v = args.get( "skip" );
|
||||
if( v != null ){
|
||||
skip = Integer.parseInt( v );
|
||||
}
|
||||
}
|
||||
|
||||
public Tokenizer create(Reader input) {
|
||||
return new PathHierarchyTokenizer(input, delimiter, replacement);
|
||||
if( reverse ) {
|
||||
return new ReversePathHierarchyTokenizer(input, delimiter, replacement, skip);
|
||||
}
|
||||
return new PathHierarchyTokenizer(input, delimiter, replacement, skip);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue