mirror of https://github.com/apache/lucene.git
LUCENE-1947: Snowball package contains BSD licensed code with ASL header.
Also updated use of StringBuffer to StringBuilder, and added a bit of documentation about this. git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@823445 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
a52b711350
commit
1d62f1a3f0
|
@ -4,6 +4,12 @@ This project provides pre-compiled version of the Snowball stemmers
|
||||||
based on revision 500 of the Tartarus Snowball repository,
|
based on revision 500 of the Tartarus Snowball repository,
|
||||||
together with classes integrating them with the Lucene search engine.
|
together with classes integrating them with the Lucene search engine.
|
||||||
|
|
||||||
|
A few changes has been made to the static Snowball code and compiled stemmers:
|
||||||
|
|
||||||
|
* Class SnowballProgram is made abstract and contains new abstract method stem() to avoid reflection in Lucene filter class SnowballFilter.
|
||||||
|
* All use of StringBuffers has been refactored to StringBuilder for speed.
|
||||||
|
* Snowball BSD license header has been added to the Java classes to avoid having RAT adding new ASL headers.
|
||||||
|
|
||||||
|
|
||||||
IMPORTANT NOTICE ON BACKWARDS COMPATIBILITY!
|
IMPORTANT NOTICE ON BACKWARDS COMPATIBILITY!
|
||||||
|
|
||||||
|
|
|
@ -1,22 +1,36 @@
|
||||||
package org.tartarus.snowball;
|
/*
|
||||||
|
|
||||||
|
Copyright (c) 2001, Dr Martin Porter
|
||||||
|
Copyright (c) 2002, Richard Boulton
|
||||||
|
All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are met:
|
||||||
|
|
||||||
|
* Redistributions of source code must retain the above copyright notice,
|
||||||
|
* this list of conditions and the following disclaimer.
|
||||||
|
* Redistributions in binary form must reproduce the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer in the
|
||||||
|
* documentation and/or other materials provided with the distribution.
|
||||||
|
* Neither the name of the copyright holders nor the names of its contributors
|
||||||
|
* may be used to endorse or promote products derived from this software
|
||||||
|
* without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||||
|
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
|
||||||
|
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||||
|
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||||
|
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||||
|
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||||
|
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
/**
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
|
||||||
* this work for additional information regarding copyright ownership.
|
|
||||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
* (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
package org.tartarus.snowball;
|
||||||
|
|
||||||
import java.lang.reflect.Method;
|
import java.lang.reflect.Method;
|
||||||
|
|
||||||
public class Among {
|
public class Among {
|
||||||
|
|
|
@ -1,34 +1,49 @@
|
||||||
package org.tartarus.snowball;
|
/*
|
||||||
|
|
||||||
|
Copyright (c) 2001, Dr Martin Porter
|
||||||
|
Copyright (c) 2002, Richard Boulton
|
||||||
|
All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are met:
|
||||||
|
|
||||||
|
* Redistributions of source code must retain the above copyright notice,
|
||||||
|
* this list of conditions and the following disclaimer.
|
||||||
|
* Redistributions in binary form must reproduce the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer in the
|
||||||
|
* documentation and/or other materials provided with the distribution.
|
||||||
|
* Neither the name of the copyright holders nor the names of its contributors
|
||||||
|
* may be used to endorse or promote products derived from this software
|
||||||
|
* without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||||
|
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
|
||||||
|
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||||
|
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||||
|
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||||
|
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||||
|
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
/**
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
|
||||||
* this work for additional information regarding copyright ownership.
|
|
||||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
* (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
package org.tartarus.snowball;
|
||||||
|
|
||||||
import java.lang.reflect.InvocationTargetException;
|
import java.lang.reflect.InvocationTargetException;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* This is the rev 500 of the Snowball SVN trunk,
|
* This is the rev 500 of the Snowball SVN trunk,
|
||||||
* but modified:
|
* but modified:
|
||||||
* made abstract and introduced abstract method stem
|
* made abstract and introduced abstract method stem to avoid expensive reflection in filter class.
|
||||||
* to avoid expensive
|
* refactored StringBuffers to StringBuilder
|
||||||
*/
|
*/
|
||||||
public abstract class SnowballProgram {
|
public abstract class SnowballProgram {
|
||||||
protected SnowballProgram()
|
protected SnowballProgram()
|
||||||
{
|
{
|
||||||
current = new StringBuffer();
|
current = new StringBuilder();
|
||||||
setCurrent("");
|
setCurrent("");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -59,12 +74,12 @@ public abstract class SnowballProgram {
|
||||||
// the buffer size will not decrease, and we will risk wasting a large
|
// the buffer size will not decrease, and we will risk wasting a large
|
||||||
// amount of memory.
|
// amount of memory.
|
||||||
// Thanks to Wolfram Esser for spotting this problem.
|
// Thanks to Wolfram Esser for spotting this problem.
|
||||||
current = new StringBuffer();
|
current = new StringBuilder();
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
// current string
|
// current string
|
||||||
protected StringBuffer current;
|
protected StringBuilder current;
|
||||||
|
|
||||||
protected int cursor;
|
protected int cursor;
|
||||||
protected int limit;
|
protected int limit;
|
||||||
|
@ -194,12 +209,12 @@ public abstract class SnowballProgram {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
protected boolean eq_v(StringBuffer s)
|
protected boolean eq_v(StringBuilder s)
|
||||||
{
|
{
|
||||||
return eq_s(s.length(), s.toString());
|
return eq_s(s.length(), s.toString());
|
||||||
}
|
}
|
||||||
|
|
||||||
protected boolean eq_v_b(StringBuffer s)
|
protected boolean eq_v_b(StringBuilder s)
|
||||||
{ return eq_s_b(s.length(), s.toString());
|
{ return eq_s_b(s.length(), s.toString());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -380,7 +395,7 @@ public abstract class SnowballProgram {
|
||||||
replace_s(bra, ket, s);
|
replace_s(bra, ket, s);
|
||||||
}
|
}
|
||||||
|
|
||||||
protected void slice_from(StringBuffer s)
|
protected void slice_from(StringBuilder s)
|
||||||
{
|
{
|
||||||
slice_from(s.toString());
|
slice_from(s.toString());
|
||||||
}
|
}
|
||||||
|
@ -397,13 +412,13 @@ public abstract class SnowballProgram {
|
||||||
if (c_bra <= ket) ket += adjustment;
|
if (c_bra <= ket) ket += adjustment;
|
||||||
}
|
}
|
||||||
|
|
||||||
protected void insert(int c_bra, int c_ket, StringBuffer s)
|
protected void insert(int c_bra, int c_ket, StringBuilder s)
|
||||||
{
|
{
|
||||||
insert(c_bra, c_ket, s.toString());
|
insert(c_bra, c_ket, s.toString());
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Copy the slice into the supplied StringBuffer */
|
/* Copy the slice into the supplied StringBuffer */
|
||||||
protected StringBuffer slice_to(StringBuffer s)
|
protected StringBuilder slice_to(StringBuilder s)
|
||||||
{
|
{
|
||||||
slice_check();
|
slice_check();
|
||||||
int len = ket - bra;
|
int len = ket - bra;
|
||||||
|
@ -411,7 +426,7 @@ public abstract class SnowballProgram {
|
||||||
return s;
|
return s;
|
||||||
}
|
}
|
||||||
|
|
||||||
protected StringBuffer assign_to(StringBuffer s)
|
protected StringBuilder assign_to(StringBuilder s)
|
||||||
{
|
{
|
||||||
s.replace(0, s.length(), current.substring(0, limit));
|
s.replace(0, s.length(), current.substring(0, limit));
|
||||||
return s;
|
return s;
|
||||||
|
|
|
@ -1,22 +1,36 @@
|
||||||
package org.tartarus.snowball;
|
/*
|
||||||
|
|
||||||
|
Copyright (c) 2001, Dr Martin Porter
|
||||||
|
Copyright (c) 2002, Richard Boulton
|
||||||
|
All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are met:
|
||||||
|
|
||||||
|
* Redistributions of source code must retain the above copyright notice,
|
||||||
|
* this list of conditions and the following disclaimer.
|
||||||
|
* Redistributions in binary form must reproduce the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer in the
|
||||||
|
* documentation and/or other materials provided with the distribution.
|
||||||
|
* Neither the name of the copyright holders nor the names of its contributors
|
||||||
|
* may be used to endorse or promote products derived from this software
|
||||||
|
* without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||||
|
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
|
||||||
|
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||||
|
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||||
|
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||||
|
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||||
|
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
/**
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
|
||||||
* this work for additional information regarding copyright ownership.
|
|
||||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
* (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
package org.tartarus.snowball;
|
||||||
|
|
||||||
import java.lang.reflect.Method;
|
import java.lang.reflect.Method;
|
||||||
import java.io.Reader;
|
import java.io.Reader;
|
||||||
import java.io.Writer;
|
import java.io.Writer;
|
||||||
|
|
|
@ -65,7 +65,7 @@ public class DanishStemmer extends SnowballProgram {
|
||||||
|
|
||||||
private int I_x;
|
private int I_x;
|
||||||
private int I_p1;
|
private int I_p1;
|
||||||
private StringBuffer S_ch = new StringBuffer();
|
private StringBuilder S_ch = new StringBuilder();
|
||||||
|
|
||||||
private void copy_from(DanishStemmer other) {
|
private void copy_from(DanishStemmer other) {
|
||||||
I_x = other.I_x;
|
I_x = other.I_x;
|
||||||
|
|
|
@ -136,7 +136,7 @@ public class FinnishStemmer extends SnowballProgram {
|
||||||
private static final char g_particle_end[] = {17, 97, 24, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 0, 32 };
|
private static final char g_particle_end[] = {17, 97, 24, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 0, 32 };
|
||||||
|
|
||||||
private boolean B_ending_removed;
|
private boolean B_ending_removed;
|
||||||
private StringBuffer S_x = new StringBuffer();
|
private StringBuilder S_x = new StringBuilder();
|
||||||
private int I_p2;
|
private int I_p2;
|
||||||
private int I_p1;
|
private int I_p1;
|
||||||
|
|
||||||
|
|
|
@ -125,7 +125,7 @@ public class KpStemmer extends SnowballProgram {
|
||||||
private int I_p2;
|
private int I_p2;
|
||||||
private int I_p1;
|
private int I_p1;
|
||||||
private int I_x;
|
private int I_x;
|
||||||
private StringBuffer S_ch = new StringBuffer();
|
private StringBuilder S_ch = new StringBuilder();
|
||||||
|
|
||||||
private void copy_from(KpStemmer other) {
|
private void copy_from(KpStemmer other) {
|
||||||
B_GE_removed = other.B_GE_removed;
|
B_GE_removed = other.B_GE_removed;
|
||||||
|
|
|
@ -26,6 +26,14 @@ based on revision 500 of the Tartarus Snowball repository,
|
||||||
together with classes integrating them with the Lucene search engine.
|
together with classes integrating them with the Lucene search engine.
|
||||||
</p>
|
</p>
|
||||||
<p>
|
<p>
|
||||||
|
A few changes has been made to the static Snowball code and compiled stemmers:
|
||||||
|
</p>
|
||||||
|
<ul>
|
||||||
|
<li>Class SnowballProgram is made abstract and contains new abstract method stem() to avoid reflection in Lucene filter class SnowballFilter.</li>
|
||||||
|
<li>All use of StringBuffers has been refactored to StringBuilder for speed.</li>
|
||||||
|
<li>Snowball BSD license header has been added to the Java classes to avoid having RAT adding ASL headers.</li>
|
||||||
|
</ul>
|
||||||
|
<p>
|
||||||
See the Snowball <a href ="http://snowball.tartarus.org/">home page</a> for more information about the algorithms.
|
See the Snowball <a href ="http://snowball.tartarus.org/">home page</a> for more information about the algorithms.
|
||||||
</p>
|
</p>
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue