mirror of https://github.com/apache/lucene.git
LUCENE-2847: Added ASL2 license to supplementary macros generator, and to the generated file, and set svn:eol-style to native for both of them.
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1056014 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
fbfb07d904
commit
8d7d57abdc
|
@ -1,105 +1,125 @@
|
|||
ALetterSupp = (
|
||||
([\ud80d][\uDC00-\uDC2E])
|
||||
| ([\ud80c][\uDC00-\uDFFF])
|
||||
| ([\ud809][\uDC00-\uDC62])
|
||||
| ([\ud808][\uDC00-\uDF6E])
|
||||
| ([\ud81a][\uDC00-\uDE38])
|
||||
| ([\ud804][\uDC03-\uDC37\uDC83-\uDCAF])
|
||||
| ([\ud835][\uDC00-\uDC54\uDC56-\uDC9C\uDC9E\uDC9F\uDCA2\uDCA5\uDCA6\uDCA9-\uDCAC\uDCAE-\uDCB9\uDCBB\uDCBD-\uDCC3\uDCC5-\uDD05\uDD07-\uDD0A\uDD0D-\uDD14\uDD16-\uDD1C\uDD1E-\uDD39\uDD3B-\uDD3E\uDD40-\uDD44\uDD46\uDD4A-\uDD50\uDD52-\uDEA5\uDEA8-\uDEC0\uDEC2-\uDEDA\uDEDC-\uDEFA\uDEFC-\uDF14\uDF16-\uDF34\uDF36-\uDF4E\uDF50-\uDF6E\uDF70-\uDF88\uDF8A-\uDFA8\uDFAA-\uDFC2\uDFC4-\uDFCB])
|
||||
| ([\ud801][\uDC00-\uDC9D])
|
||||
| ([\ud800][\uDC00-\uDC0B\uDC0D-\uDC26\uDC28-\uDC3A\uDC3C\uDC3D\uDC3F-\uDC4D\uDC50-\uDC5D\uDC80-\uDCFA\uDD40-\uDD74\uDE80-\uDE9C\uDEA0-\uDED0\uDF00-\uDF1E\uDF30-\uDF4A\uDF80-\uDF9D\uDFA0-\uDFC3\uDFC8-\uDFCF\uDFD1-\uDFD5])
|
||||
| ([\ud803][\uDC00-\uDC48])
|
||||
| ([\ud802][\uDC00-\uDC05\uDC08\uDC0A-\uDC35\uDC37\uDC38\uDC3C\uDC3F-\uDC55\uDD00-\uDD15\uDD20-\uDD39\uDE00\uDE10-\uDE13\uDE15-\uDE17\uDE19-\uDE33\uDE60-\uDE7C\uDF00-\uDF35\uDF40-\uDF55\uDF60-\uDF72])
|
||||
)
|
||||
FormatSupp = (
|
||||
([\ud804][\uDCBD])
|
||||
| ([\ud834][\uDD73-\uDD7A])
|
||||
| ([\udb40][\uDC01\uDC20-\uDC7F])
|
||||
)
|
||||
ExtendSupp = (
|
||||
([\ud804][\uDC00-\uDC02\uDC38-\uDC46\uDC80-\uDC82\uDCB0-\uDCBA])
|
||||
| ([\ud834][\uDD65-\uDD69\uDD6D-\uDD72\uDD7B-\uDD82\uDD85-\uDD8B\uDDAA-\uDDAD\uDE42-\uDE44])
|
||||
| ([\ud800][\uDDFD])
|
||||
| ([\udb40][\uDD00-\uDDEF])
|
||||
| ([\ud802][\uDE01-\uDE03\uDE05\uDE06\uDE0C-\uDE0F\uDE38-\uDE3A\uDE3F])
|
||||
)
|
||||
NumericSupp = (
|
||||
([\ud804][\uDC66-\uDC6F])
|
||||
| ([\ud835][\uDFCE-\uDFFF])
|
||||
| ([\ud801][\uDCA0-\uDCA9])
|
||||
)
|
||||
KatakanaSupp = (
|
||||
([\ud82c][\uDC00])
|
||||
)
|
||||
MidLetterSupp = (
|
||||
[]
|
||||
)
|
||||
MidNumSupp = (
|
||||
[]
|
||||
)
|
||||
MidNumLetSupp = (
|
||||
[]
|
||||
)
|
||||
ExtendNumLetSupp = (
|
||||
[]
|
||||
)
|
||||
ExtendNumLetSupp = (
|
||||
[]
|
||||
)
|
||||
ComplexContextSupp = (
|
||||
[]
|
||||
)
|
||||
HanSupp = (
|
||||
([\ud87e][\uDC00-\uDE1D])
|
||||
| ([\ud86b][\uDC00-\uDFFF])
|
||||
| ([\ud86a][\uDC00-\uDFFF])
|
||||
| ([\ud869][\uDC00-\uDED6\uDF00-\uDFFF])
|
||||
| ([\ud868][\uDC00-\uDFFF])
|
||||
| ([\ud86e][\uDC00-\uDC1D])
|
||||
| ([\ud86d][\uDC00-\uDF34\uDF40-\uDFFF])
|
||||
| ([\ud86c][\uDC00-\uDFFF])
|
||||
| ([\ud863][\uDC00-\uDFFF])
|
||||
| ([\ud862][\uDC00-\uDFFF])
|
||||
| ([\ud861][\uDC00-\uDFFF])
|
||||
| ([\ud860][\uDC00-\uDFFF])
|
||||
| ([\ud867][\uDC00-\uDFFF])
|
||||
| ([\ud866][\uDC00-\uDFFF])
|
||||
| ([\ud865][\uDC00-\uDFFF])
|
||||
| ([\ud864][\uDC00-\uDFFF])
|
||||
| ([\ud858][\uDC00-\uDFFF])
|
||||
| ([\ud859][\uDC00-\uDFFF])
|
||||
| ([\ud85a][\uDC00-\uDFFF])
|
||||
| ([\ud85b][\uDC00-\uDFFF])
|
||||
| ([\ud85c][\uDC00-\uDFFF])
|
||||
| ([\ud85d][\uDC00-\uDFFF])
|
||||
| ([\ud85e][\uDC00-\uDFFF])
|
||||
| ([\ud85f][\uDC00-\uDFFF])
|
||||
| ([\ud850][\uDC00-\uDFFF])
|
||||
| ([\ud851][\uDC00-\uDFFF])
|
||||
| ([\ud852][\uDC00-\uDFFF])
|
||||
| ([\ud853][\uDC00-\uDFFF])
|
||||
| ([\ud854][\uDC00-\uDFFF])
|
||||
| ([\ud855][\uDC00-\uDFFF])
|
||||
| ([\ud856][\uDC00-\uDFFF])
|
||||
| ([\ud857][\uDC00-\uDFFF])
|
||||
| ([\ud849][\uDC00-\uDFFF])
|
||||
| ([\ud848][\uDC00-\uDFFF])
|
||||
| ([\ud84b][\uDC00-\uDFFF])
|
||||
| ([\ud84a][\uDC00-\uDFFF])
|
||||
| ([\ud84d][\uDC00-\uDFFF])
|
||||
| ([\ud84c][\uDC00-\uDFFF])
|
||||
| ([\ud84f][\uDC00-\uDFFF])
|
||||
| ([\ud84e][\uDC00-\uDFFF])
|
||||
| ([\ud841][\uDC00-\uDFFF])
|
||||
| ([\ud840][\uDC00-\uDFFF])
|
||||
| ([\ud843][\uDC00-\uDFFF])
|
||||
| ([\ud842][\uDC00-\uDFFF])
|
||||
| ([\ud845][\uDC00-\uDFFF])
|
||||
| ([\ud844][\uDC00-\uDFFF])
|
||||
| ([\ud847][\uDC00-\uDFFF])
|
||||
| ([\ud846][\uDC00-\uDFFF])
|
||||
)
|
||||
HiraganaSupp = (
|
||||
([\ud83c][\uDE00])
|
||||
| ([\ud82c][\uDC01])
|
||||
)
|
||||
/*
|
||||
* Copyright 2010 The Apache Software Foundation.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
// Generated using ICU4J 4.6.0.0 on Thursday, January 6, 2011 7:02:52 PM UTC
|
||||
// by org.apache.lucene.analysis.icu.GenerateJFlexSupplementaryMacros
|
||||
|
||||
|
||||
ALetterSupp = (
|
||||
([\ud80d][\uDC00-\uDC2E])
|
||||
| ([\ud80c][\uDC00-\uDFFF])
|
||||
| ([\ud809][\uDC00-\uDC62])
|
||||
| ([\ud808][\uDC00-\uDF6E])
|
||||
| ([\ud81a][\uDC00-\uDE38])
|
||||
| ([\ud804][\uDC03-\uDC37\uDC83-\uDCAF])
|
||||
| ([\ud835][\uDC00-\uDC54\uDC56-\uDC9C\uDC9E\uDC9F\uDCA2\uDCA5\uDCA6\uDCA9-\uDCAC\uDCAE-\uDCB9\uDCBB\uDCBD-\uDCC3\uDCC5-\uDD05\uDD07-\uDD0A\uDD0D-\uDD14\uDD16-\uDD1C\uDD1E-\uDD39\uDD3B-\uDD3E\uDD40-\uDD44\uDD46\uDD4A-\uDD50\uDD52-\uDEA5\uDEA8-\uDEC0\uDEC2-\uDEDA\uDEDC-\uDEFA\uDEFC-\uDF14\uDF16-\uDF34\uDF36-\uDF4E\uDF50-\uDF6E\uDF70-\uDF88\uDF8A-\uDFA8\uDFAA-\uDFC2\uDFC4-\uDFCB])
|
||||
| ([\ud801][\uDC00-\uDC9D])
|
||||
| ([\ud800][\uDC00-\uDC0B\uDC0D-\uDC26\uDC28-\uDC3A\uDC3C\uDC3D\uDC3F-\uDC4D\uDC50-\uDC5D\uDC80-\uDCFA\uDD40-\uDD74\uDE80-\uDE9C\uDEA0-\uDED0\uDF00-\uDF1E\uDF30-\uDF4A\uDF80-\uDF9D\uDFA0-\uDFC3\uDFC8-\uDFCF\uDFD1-\uDFD5])
|
||||
| ([\ud803][\uDC00-\uDC48])
|
||||
| ([\ud802][\uDC00-\uDC05\uDC08\uDC0A-\uDC35\uDC37\uDC38\uDC3C\uDC3F-\uDC55\uDD00-\uDD15\uDD20-\uDD39\uDE00\uDE10-\uDE13\uDE15-\uDE17\uDE19-\uDE33\uDE60-\uDE7C\uDF00-\uDF35\uDF40-\uDF55\uDF60-\uDF72])
|
||||
)
|
||||
FormatSupp = (
|
||||
([\ud804][\uDCBD])
|
||||
| ([\ud834][\uDD73-\uDD7A])
|
||||
| ([\udb40][\uDC01\uDC20-\uDC7F])
|
||||
)
|
||||
ExtendSupp = (
|
||||
([\ud804][\uDC00-\uDC02\uDC38-\uDC46\uDC80-\uDC82\uDCB0-\uDCBA])
|
||||
| ([\ud834][\uDD65-\uDD69\uDD6D-\uDD72\uDD7B-\uDD82\uDD85-\uDD8B\uDDAA-\uDDAD\uDE42-\uDE44])
|
||||
| ([\ud800][\uDDFD])
|
||||
| ([\udb40][\uDD00-\uDDEF])
|
||||
| ([\ud802][\uDE01-\uDE03\uDE05\uDE06\uDE0C-\uDE0F\uDE38-\uDE3A\uDE3F])
|
||||
)
|
||||
NumericSupp = (
|
||||
([\ud804][\uDC66-\uDC6F])
|
||||
| ([\ud835][\uDFCE-\uDFFF])
|
||||
| ([\ud801][\uDCA0-\uDCA9])
|
||||
)
|
||||
KatakanaSupp = (
|
||||
([\ud82c][\uDC00])
|
||||
)
|
||||
MidLetterSupp = (
|
||||
[]
|
||||
)
|
||||
MidNumSupp = (
|
||||
[]
|
||||
)
|
||||
MidNumLetSupp = (
|
||||
[]
|
||||
)
|
||||
ExtendNumLetSupp = (
|
||||
[]
|
||||
)
|
||||
ExtendNumLetSupp = (
|
||||
[]
|
||||
)
|
||||
ComplexContextSupp = (
|
||||
[]
|
||||
)
|
||||
HanSupp = (
|
||||
([\ud87e][\uDC00-\uDE1D])
|
||||
| ([\ud86b][\uDC00-\uDFFF])
|
||||
| ([\ud86a][\uDC00-\uDFFF])
|
||||
| ([\ud869][\uDC00-\uDED6\uDF00-\uDFFF])
|
||||
| ([\ud868][\uDC00-\uDFFF])
|
||||
| ([\ud86e][\uDC00-\uDC1D])
|
||||
| ([\ud86d][\uDC00-\uDF34\uDF40-\uDFFF])
|
||||
| ([\ud86c][\uDC00-\uDFFF])
|
||||
| ([\ud863][\uDC00-\uDFFF])
|
||||
| ([\ud862][\uDC00-\uDFFF])
|
||||
| ([\ud861][\uDC00-\uDFFF])
|
||||
| ([\ud860][\uDC00-\uDFFF])
|
||||
| ([\ud867][\uDC00-\uDFFF])
|
||||
| ([\ud866][\uDC00-\uDFFF])
|
||||
| ([\ud865][\uDC00-\uDFFF])
|
||||
| ([\ud864][\uDC00-\uDFFF])
|
||||
| ([\ud858][\uDC00-\uDFFF])
|
||||
| ([\ud859][\uDC00-\uDFFF])
|
||||
| ([\ud85a][\uDC00-\uDFFF])
|
||||
| ([\ud85b][\uDC00-\uDFFF])
|
||||
| ([\ud85c][\uDC00-\uDFFF])
|
||||
| ([\ud85d][\uDC00-\uDFFF])
|
||||
| ([\ud85e][\uDC00-\uDFFF])
|
||||
| ([\ud85f][\uDC00-\uDFFF])
|
||||
| ([\ud850][\uDC00-\uDFFF])
|
||||
| ([\ud851][\uDC00-\uDFFF])
|
||||
| ([\ud852][\uDC00-\uDFFF])
|
||||
| ([\ud853][\uDC00-\uDFFF])
|
||||
| ([\ud854][\uDC00-\uDFFF])
|
||||
| ([\ud855][\uDC00-\uDFFF])
|
||||
| ([\ud856][\uDC00-\uDFFF])
|
||||
| ([\ud857][\uDC00-\uDFFF])
|
||||
| ([\ud849][\uDC00-\uDFFF])
|
||||
| ([\ud848][\uDC00-\uDFFF])
|
||||
| ([\ud84b][\uDC00-\uDFFF])
|
||||
| ([\ud84a][\uDC00-\uDFFF])
|
||||
| ([\ud84d][\uDC00-\uDFFF])
|
||||
| ([\ud84c][\uDC00-\uDFFF])
|
||||
| ([\ud84f][\uDC00-\uDFFF])
|
||||
| ([\ud84e][\uDC00-\uDFFF])
|
||||
| ([\ud841][\uDC00-\uDFFF])
|
||||
| ([\ud840][\uDC00-\uDFFF])
|
||||
| ([\ud843][\uDC00-\uDFFF])
|
||||
| ([\ud842][\uDC00-\uDFFF])
|
||||
| ([\ud845][\uDC00-\uDFFF])
|
||||
| ([\ud844][\uDC00-\uDFFF])
|
||||
| ([\ud847][\uDC00-\uDFFF])
|
||||
| ([\ud846][\uDC00-\uDFFF])
|
||||
)
|
||||
HiraganaSupp = (
|
||||
([\ud83c][\uDE00])
|
||||
| ([\ud82c][\uDC01])
|
||||
)
|
||||
|
|
|
@ -17,16 +17,46 @@ package org.apache.lucene.analysis.icu;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.text.DateFormat;
|
||||
import java.util.Date;
|
||||
import java.util.HashMap;
|
||||
import java.util.Locale;
|
||||
import java.util.TimeZone;
|
||||
|
||||
import com.ibm.icu.text.UnicodeSet;
|
||||
import com.ibm.icu.text.UnicodeSetIterator;
|
||||
import com.ibm.icu.util.VersionInfo;
|
||||
|
||||
/** creates a macro to augment jflex's unicode wordbreak support for > BMP */
|
||||
public class GenerateJFlexSupplementaryMacros {
|
||||
private static final UnicodeSet BMP = new UnicodeSet("[\u0000-\uFFFF]");
|
||||
private static final String NL = System.getProperty("line.separator");
|
||||
private static final DateFormat DATE_FORMAT = DateFormat.getDateTimeInstance
|
||||
(DateFormat.FULL, DateFormat.FULL, Locale.US);
|
||||
static {
|
||||
DATE_FORMAT.setTimeZone(TimeZone.getTimeZone("UTC"));
|
||||
}
|
||||
|
||||
private static final String APACHE_LICENSE
|
||||
= "/*" + NL
|
||||
+ " * Copyright 2010 The Apache Software Foundation." + NL
|
||||
+ " *" + NL
|
||||
+ " * Licensed under the Apache License, Version 2.0 (the \"License\");" + NL
|
||||
+ " * you may not use this file except in compliance with the License." + NL
|
||||
+ " * You may obtain a copy of the License at" + NL
|
||||
+ " *" + NL
|
||||
+ " * http://www.apache.org/licenses/LICENSE-2.0" + NL
|
||||
+ " *" + NL
|
||||
+ " * Unless required by applicable law or agreed to in writing, software" + NL
|
||||
+ " * distributed under the License is distributed on an \"AS IS\" BASIS," + NL
|
||||
+ " * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied." + NL
|
||||
+ " * See the License for the specific language governing permissions and" + NL
|
||||
+ " * limitations under the License." + NL
|
||||
+ " */" + NL + NL;
|
||||
|
||||
|
||||
public static void main(String args[]) throws Exception {
|
||||
outputHeader();
|
||||
outputMacro("ALetterSupp", "[:WordBreak=ALetter:]");
|
||||
outputMacro("FormatSupp", "[:WordBreak=Format:]");
|
||||
outputMacro("ExtendSupp", "[:WordBreak=Extend:]");
|
||||
|
@ -42,6 +72,14 @@ public class GenerateJFlexSupplementaryMacros {
|
|||
outputMacro("HiraganaSupp", "[:Script=Hiragana:]");
|
||||
}
|
||||
|
||||
static void outputHeader() {
|
||||
System.out.print(APACHE_LICENSE);
|
||||
System.out.print("// Generated using ICU4J " + VersionInfo.ICU_VERSION.toString() + " on ");
|
||||
System.out.println(DATE_FORMAT.format(new Date()));
|
||||
System.out.println("// by " + GenerateJFlexSupplementaryMacros.class.getName());
|
||||
System.out.print(NL + NL);
|
||||
}
|
||||
|
||||
// we have to carefully output the possibilities as compact utf-16
|
||||
// range expressions, or jflex will OOM!
|
||||
static void outputMacro(String name, String pattern) {
|
||||
|
|
Loading…
Reference in New Issue