mirror of https://github.com/apache/lucene.git
LUCENE-1947: Snowball package contains BSD licensed code with ASL header.
Also updated use of StringBuffer to StringBuilder, and added a bit of documentation about this. git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@823445 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
a52b711350
commit
1d62f1a3f0
|
@ -4,6 +4,12 @@ This project provides pre-compiled version of the Snowball stemmers
|
|||
based on revision 500 of the Tartarus Snowball repository,
|
||||
together with classes integrating them with the Lucene search engine.
|
||||
|
||||
A few changes has been made to the static Snowball code and compiled stemmers:
|
||||
|
||||
* Class SnowballProgram is made abstract and contains new abstract method stem() to avoid reflection in Lucene filter class SnowballFilter.
|
||||
* All use of StringBuffers has been refactored to StringBuilder for speed.
|
||||
* Snowball BSD license header has been added to the Java classes to avoid having RAT adding new ASL headers.
|
||||
|
||||
|
||||
IMPORTANT NOTICE ON BACKWARDS COMPATIBILITY!
|
||||
|
||||
|
|
|
@ -1,22 +1,36 @@
|
|||
package org.tartarus.snowball;
|
||||
/*
|
||||
|
||||
Copyright (c) 2001, Dr Martin Porter
|
||||
Copyright (c) 2002, Richard Boulton
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the copyright holders nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
|
||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.tartarus.snowball;
|
||||
|
||||
import java.lang.reflect.Method;
|
||||
|
||||
public class Among {
|
||||
|
|
|
@ -1,34 +1,49 @@
|
|||
package org.tartarus.snowball;
|
||||
/*
|
||||
|
||||
Copyright (c) 2001, Dr Martin Porter
|
||||
Copyright (c) 2002, Richard Boulton
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the copyright holders nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
|
||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
|
||||
package org.tartarus.snowball;
|
||||
|
||||
import java.lang.reflect.InvocationTargetException;
|
||||
|
||||
/**
|
||||
* This is the rev 500 of the Snowball SVN trunk,
|
||||
* but modified:
|
||||
* made abstract and introduced abstract method stem
|
||||
* to avoid expensive
|
||||
* made abstract and introduced abstract method stem to avoid expensive reflection in filter class.
|
||||
* refactored StringBuffers to StringBuilder
|
||||
*/
|
||||
public abstract class SnowballProgram {
|
||||
protected SnowballProgram()
|
||||
{
|
||||
current = new StringBuffer();
|
||||
current = new StringBuilder();
|
||||
setCurrent("");
|
||||
}
|
||||
|
||||
|
@ -59,12 +74,12 @@ public abstract class SnowballProgram {
|
|||
// the buffer size will not decrease, and we will risk wasting a large
|
||||
// amount of memory.
|
||||
// Thanks to Wolfram Esser for spotting this problem.
|
||||
current = new StringBuffer();
|
||||
current = new StringBuilder();
|
||||
return result;
|
||||
}
|
||||
|
||||
// current string
|
||||
protected StringBuffer current;
|
||||
protected StringBuilder current;
|
||||
|
||||
protected int cursor;
|
||||
protected int limit;
|
||||
|
@ -194,12 +209,12 @@ public abstract class SnowballProgram {
|
|||
return true;
|
||||
}
|
||||
|
||||
protected boolean eq_v(StringBuffer s)
|
||||
protected boolean eq_v(StringBuilder s)
|
||||
{
|
||||
return eq_s(s.length(), s.toString());
|
||||
}
|
||||
|
||||
protected boolean eq_v_b(StringBuffer s)
|
||||
protected boolean eq_v_b(StringBuilder s)
|
||||
{ return eq_s_b(s.length(), s.toString());
|
||||
}
|
||||
|
||||
|
@ -380,7 +395,7 @@ public abstract class SnowballProgram {
|
|||
replace_s(bra, ket, s);
|
||||
}
|
||||
|
||||
protected void slice_from(StringBuffer s)
|
||||
protected void slice_from(StringBuilder s)
|
||||
{
|
||||
slice_from(s.toString());
|
||||
}
|
||||
|
@ -397,13 +412,13 @@ public abstract class SnowballProgram {
|
|||
if (c_bra <= ket) ket += adjustment;
|
||||
}
|
||||
|
||||
protected void insert(int c_bra, int c_ket, StringBuffer s)
|
||||
protected void insert(int c_bra, int c_ket, StringBuilder s)
|
||||
{
|
||||
insert(c_bra, c_ket, s.toString());
|
||||
}
|
||||
|
||||
/* Copy the slice into the supplied StringBuffer */
|
||||
protected StringBuffer slice_to(StringBuffer s)
|
||||
protected StringBuilder slice_to(StringBuilder s)
|
||||
{
|
||||
slice_check();
|
||||
int len = ket - bra;
|
||||
|
@ -411,7 +426,7 @@ public abstract class SnowballProgram {
|
|||
return s;
|
||||
}
|
||||
|
||||
protected StringBuffer assign_to(StringBuffer s)
|
||||
protected StringBuilder assign_to(StringBuilder s)
|
||||
{
|
||||
s.replace(0, s.length(), current.substring(0, limit));
|
||||
return s;
|
||||
|
|
|
@ -1,22 +1,36 @@
|
|||
package org.tartarus.snowball;
|
||||
/*
|
||||
|
||||
Copyright (c) 2001, Dr Martin Porter
|
||||
Copyright (c) 2002, Richard Boulton
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
* this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the copyright holders nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
|
||||
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.tartarus.snowball;
|
||||
|
||||
import java.lang.reflect.Method;
|
||||
import java.io.Reader;
|
||||
import java.io.Writer;
|
||||
|
|
|
@ -65,7 +65,7 @@ public class DanishStemmer extends SnowballProgram {
|
|||
|
||||
private int I_x;
|
||||
private int I_p1;
|
||||
private StringBuffer S_ch = new StringBuffer();
|
||||
private StringBuilder S_ch = new StringBuilder();
|
||||
|
||||
private void copy_from(DanishStemmer other) {
|
||||
I_x = other.I_x;
|
||||
|
|
|
@ -136,7 +136,7 @@ public class FinnishStemmer extends SnowballProgram {
|
|||
private static final char g_particle_end[] = {17, 97, 24, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 0, 32 };
|
||||
|
||||
private boolean B_ending_removed;
|
||||
private StringBuffer S_x = new StringBuffer();
|
||||
private StringBuilder S_x = new StringBuilder();
|
||||
private int I_p2;
|
||||
private int I_p1;
|
||||
|
||||
|
|
|
@ -125,7 +125,7 @@ public class KpStemmer extends SnowballProgram {
|
|||
private int I_p2;
|
||||
private int I_p1;
|
||||
private int I_x;
|
||||
private StringBuffer S_ch = new StringBuffer();
|
||||
private StringBuilder S_ch = new StringBuilder();
|
||||
|
||||
private void copy_from(KpStemmer other) {
|
||||
B_GE_removed = other.B_GE_removed;
|
||||
|
|
|
@ -26,6 +26,14 @@ based on revision 500 of the Tartarus Snowball repository,
|
|||
together with classes integrating them with the Lucene search engine.
|
||||
</p>
|
||||
<p>
|
||||
A few changes has been made to the static Snowball code and compiled stemmers:
|
||||
</p>
|
||||
<ul>
|
||||
<li>Class SnowballProgram is made abstract and contains new abstract method stem() to avoid reflection in Lucene filter class SnowballFilter.</li>
|
||||
<li>All use of StringBuffers has been refactored to StringBuilder for speed.</li>
|
||||
<li>Snowball BSD license header has been added to the Java classes to avoid having RAT adding ASL headers.</li>
|
||||
</ul>
|
||||
<p>
|
||||
See the Snowball <a href ="http://snowball.tartarus.org/">home page</a> for more information about the algorithms.
|
||||
</p>
|
||||
|
||||
|
|
Loading…
Reference in New Issue