LUCENE-9684: Hunspell: support COMPOUNDRULE (#2228)

This commit is contained in:
Peter Gromov 2021-01-22 12:01:53 +01:00 committed by GitHub
parent cf5db8d651
commit d7968130c3
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
40 changed files with 730 additions and 29 deletions

View File

@ -86,8 +86,8 @@ API Changes
Improvements Improvements
* LUCENE-9665 LUCENE-9676 LUCENE-9667 : Hunspell improvements: add SpellChecker API, support default encoding and * LUCENE-9687: Hunspell support improvements: add SpellChecker API, support default encoding and
BREAK/FORBIDDENWORD affix rules, improve stemming of all-caps words (Peter Gromov) BREAK/FORBIDDENWORD/COMPOUNDRULE affix rules, improve stemming of all-caps words (Peter Gromov)
* LUCENE-9633: Improve match highlighter behavior for degenerate intervals (on non-existing positions). * LUCENE-9633: Improve match highlighter behavior for degenerate intervals (on non-existing positions).
(Dawid Weiss) (Dawid Weiss)

View File

@ -0,0 +1,105 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.analysis.hunspell;
import java.util.List;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IntsRef;
class CompoundRule {
private final char[] data;
private final Dictionary dictionary;
CompoundRule(String rule, Dictionary dictionary) {
this.dictionary = dictionary;
StringBuilder parsedFlags = new StringBuilder();
int pos = 0;
while (pos < rule.length()) {
int lParen = rule.indexOf("(", pos);
if (lParen < 0) {
parsedFlags.append(dictionary.flagParsingStrategy.parseFlags(rule.substring(pos)));
break;
}
parsedFlags.append(dictionary.flagParsingStrategy.parseFlags(rule.substring(pos, lParen)));
int rParen = rule.indexOf(')', lParen + 1);
if (rParen < 0) {
throw new IllegalArgumentException("Unmatched parentheses: " + rule);
}
parsedFlags.append(
dictionary.flagParsingStrategy.parseFlags(rule.substring(lParen + 1, rParen)));
pos = rParen + 1;
if (pos < rule.length() && (rule.charAt(pos) == '?' || rule.charAt(pos) == '*')) {
parsedFlags.append(rule.charAt(pos++));
}
}
data = parsedFlags.toString().toCharArray();
}
boolean mayMatch(List<IntsRef> words, BytesRef scratch) {
return match(words, 0, 0, scratch, false);
}
boolean fullyMatches(List<IntsRef> words, BytesRef scratch) {
return match(words, 0, 0, scratch, true);
}
private boolean match(
List<IntsRef> words, int patternIndex, int wordIndex, BytesRef scratch, boolean fully) {
if (patternIndex >= data.length) {
return wordIndex >= words.size();
}
if (wordIndex >= words.size() && !fully) {
return true;
}
char flag = data[patternIndex];
if (patternIndex < data.length - 1 && data[patternIndex + 1] == '*') {
int startWI = wordIndex;
while (wordIndex < words.size() && dictionary.hasFlag(words.get(wordIndex), flag, scratch)) {
wordIndex++;
}
while (wordIndex >= startWI) {
if (match(words, patternIndex + 2, wordIndex, scratch, fully)) {
return true;
}
wordIndex--;
}
return false;
}
boolean currentWordMatches =
wordIndex < words.size() && dictionary.hasFlag(words.get(wordIndex), flag, scratch);
if (patternIndex < data.length - 1 && data[patternIndex + 1] == '?') {
if (currentWordMatches && match(words, patternIndex + 2, wordIndex + 1, scratch, fully)) {
return true;
}
return match(words, patternIndex + 2, wordIndex, scratch, fully);
}
return currentWordMatches && match(words, patternIndex + 1, wordIndex + 1, scratch, fully);
}
@Override
public String toString() {
return new String(data);
}
}

View File

@ -92,6 +92,8 @@ public class Dictionary {
private static final String LANG_KEY = "LANG"; private static final String LANG_KEY = "LANG";
private static final String BREAK_KEY = "BREAK"; private static final String BREAK_KEY = "BREAK";
private static final String FORBIDDENWORD_KEY = "FORBIDDENWORD"; private static final String FORBIDDENWORD_KEY = "FORBIDDENWORD";
private static final String COMPOUNDMIN_KEY = "COMPOUNDMIN";
private static final String COMPOUNDRULE_KEY = "COMPOUNDRULE";
private static final String KEEPCASE_KEY = "KEEPCASE"; private static final String KEEPCASE_KEY = "KEEPCASE";
private static final String NEEDAFFIX_KEY = "NEEDAFFIX"; private static final String NEEDAFFIX_KEY = "NEEDAFFIX";
private static final String PSEUDOROOT_KEY = "PSEUDOROOT"; private static final String PSEUDOROOT_KEY = "PSEUDOROOT";
@ -136,7 +138,7 @@ public class Dictionary {
static final int AFFIX_APPEND = 3; static final int AFFIX_APPEND = 3;
// Default flag parsing strategy // Default flag parsing strategy
private FlagParsingStrategy flagParsingStrategy = new SimpleFlagParsingStrategy(); FlagParsingStrategy flagParsingStrategy = new SimpleFlagParsingStrategy();
// AF entries // AF entries
private String[] aliases; private String[] aliases;
@ -163,6 +165,8 @@ public class Dictionary {
int needaffix = -1; // needaffix flag, or -1 if one is not defined int needaffix = -1; // needaffix flag, or -1 if one is not defined
int forbiddenword = -1; // forbiddenword flag, or -1 if one is not defined int forbiddenword = -1; // forbiddenword flag, or -1 if one is not defined
int onlyincompound = -1; // onlyincompound flag, or -1 if one is not defined int onlyincompound = -1; // onlyincompound flag, or -1 if one is not defined
int compoundMin = 3;
List<CompoundRule> compoundRules; // nullable
// ignored characters (dictionary, affix, inputs) // ignored characters (dictionary, affix, inputs)
private char[] ignore; private char[] ignore;
@ -419,6 +423,18 @@ public class Dictionary {
throw new ParseException("Illegal FORBIDDENWORD declaration", reader.getLineNumber()); throw new ParseException("Illegal FORBIDDENWORD declaration", reader.getLineNumber());
} }
forbiddenword = flagParsingStrategy.parseFlag(parts[1]); forbiddenword = flagParsingStrategy.parseFlag(parts[1]);
} else if (line.startsWith(COMPOUNDMIN_KEY)) {
String[] parts = line.split("\\s+");
if (parts.length != 2) {
throw new ParseException("Illegal COMPOUNDMIN declaration", reader.getLineNumber());
}
compoundMin = Math.max(1, Integer.parseInt(parts[1]));
} else if (line.startsWith(COMPOUNDRULE_KEY)) {
String[] parts = line.split("\\s+");
if (parts.length != 2) {
throw new ParseException("Illegal COMPOUNDRULE header", reader.getLineNumber());
}
this.compoundRules = parseCompoundRules(reader, Integer.parseInt(parts[1]));
} }
} }
@ -442,6 +458,21 @@ public class Dictionary {
stripOffsets[currentIndex] = currentOffset; stripOffsets[currentIndex] = currentOffset;
} }
private List<CompoundRule> parseCompoundRules(LineNumberReader reader, int num)
throws IOException, ParseException {
String line;
List<CompoundRule> compoundRules = new ArrayList<>();
for (int i = 0; i < num; i++) {
line = reader.readLine();
String[] parts = line.split("\\s+");
if (!line.startsWith(COMPOUNDRULE_KEY) || parts.length != 2) {
throw new ParseException("COMPOUNDRULE rule expected", reader.getLineNumber());
}
compoundRules.add(new CompoundRule(parts[1], this));
}
return compoundRules;
}
private Breaks parseBreaks(LineNumberReader reader, String line) private Breaks parseBreaks(LineNumberReader reader, String line)
throws IOException, ParseException { throws IOException, ParseException {
Set<String> starting = new LinkedHashSet<>(); Set<String> starting = new LinkedHashSet<>();
@ -910,7 +941,7 @@ public class Dictionary {
reuse.append(caseFold(word.charAt(i))); reuse.append(caseFold(word.charAt(i)));
} }
reuse.append(FLAG_SEPARATOR); reuse.append(FLAG_SEPARATOR);
reuse.append(HIDDEN_FLAG); flagParsingStrategy.appendFlag(HIDDEN_FLAG, reuse);
reuse.append(afterSep, afterSep.charAt(0) == FLAG_SEPARATOR ? 1 : 0, afterSep.length()); reuse.append(afterSep, afterSep.charAt(0) == FLAG_SEPARATOR ? 1 : 0, afterSep.length());
writer.write(reuse.toString().getBytes(StandardCharsets.UTF_8)); writer.write(reuse.toString().getBytes(StandardCharsets.UTF_8));
} }
@ -1188,16 +1219,19 @@ public class Dictionary {
return null; return null;
} }
boolean isForbiddenWord(char[] word, BytesRef scratch) { boolean isForbiddenWord(char[] word, int length, BytesRef scratch) {
if (forbiddenword != -1) { if (forbiddenword != -1) {
IntsRef forms = lookupWord(word, 0, word.length); IntsRef forms = lookupWord(word, 0, length);
if (forms != null) { return forms != null && hasFlag(forms, (char) forbiddenword, scratch);
int formStep = formStep(); }
for (int i = 0; i < forms.length; i += formStep) { return false;
if (hasFlag(forms.ints[forms.offset + i], (char) forbiddenword, scratch)) { }
return true;
} boolean hasFlag(IntsRef forms, char flag, BytesRef scratch) {
} int formStep = formStep();
for (int i = 0; i < forms.length; i += formStep) {
if (hasFlag(forms.ints[forms.offset + i], flag, scratch)) {
return true;
} }
} }
return false; return false;
@ -1227,6 +1261,8 @@ public class Dictionary {
* @return Parsed flags * @return Parsed flags
*/ */
abstract char[] parseFlags(String rawFlags); abstract char[] parseFlags(String rawFlags);
abstract void appendFlag(char flag, StringBuilder to);
} }
/** /**
@ -1238,6 +1274,11 @@ public class Dictionary {
public char[] parseFlags(String rawFlags) { public char[] parseFlags(String rawFlags) {
return rawFlags.toCharArray(); return rawFlags.toCharArray();
} }
@Override
void appendFlag(char flag, StringBuilder to) {
to.append(flag);
}
} }
/** /**
@ -1266,6 +1307,14 @@ public class Dictionary {
} }
return flags; return flags;
} }
@Override
void appendFlag(char flag, StringBuilder to) {
if (to.length() > 0) {
to.append(",");
}
to.append((int) flag);
}
} }
/** /**
@ -1300,6 +1349,16 @@ public class Dictionary {
builder.getChars(0, builder.length(), flags, 0); builder.getChars(0, builder.length(), flags, 0);
return flags; return flags;
} }
@Override
void appendFlag(char flag, StringBuilder to) {
to.append((char) (flag >> 8));
to.append((char) (flag & 0xff));
}
}
boolean hasCompounding() {
return compoundRules != null;
} }
boolean hasFlag(int entryId, char flag, BytesRef scratch) { boolean hasFlag(int entryId, char flag, BytesRef scratch) {

View File

@ -16,7 +16,10 @@
*/ */
package org.apache.lucene.analysis.hunspell; package org.apache.lucene.analysis.hunspell;
import java.util.ArrayList;
import java.util.List;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IntsRef;
/** /**
* A spell checker based on Hunspell dictionaries. The objects of this class are not thread-safe * A spell checker based on Hunspell dictionaries. The objects of this class are not thread-safe
@ -37,26 +40,100 @@ public class SpellChecker {
public boolean spell(String word) { public boolean spell(String word) {
if (word.isEmpty()) return true; if (word.isEmpty()) return true;
char[] wordChars = word.toCharArray(); if (dictionary.needsInputCleaning) {
if (dictionary.isForbiddenWord(wordChars, scratch)) { word = dictionary.cleanInput(word, new StringBuilder()).toString();
return false;
} }
if (isNumber(word)) { if (isNumber(word)) {
return true; return true;
} }
if (!stemmer.stem(wordChars, word.length()).isEmpty()) { char[] wordChars = word.toCharArray();
if (checkWord(wordChars, wordChars.length, false)) {
return true; return true;
} }
if (dictionary.breaks.isNotEmpty() && !hasTooManyBreakOccurrences(word)) { WordCase wc = stemmer.caseOf(wordChars, wordChars.length);
if ((wc == WordCase.UPPER || wc == WordCase.TITLE) && checkCaseVariants(wordChars, wc)) {
return true;
}
if (dictionary.breaks.isNotEmpty()
&& !hasTooManyBreakOccurrences(word)
&& !dictionary.isForbiddenWord(wordChars, word.length(), scratch)) {
return tryBreaks(word); return tryBreaks(word);
} }
return false; return false;
} }
private boolean checkCaseVariants(char[] wordChars, WordCase wordCase) {
char[] caseVariant = wordChars;
if (wordCase == WordCase.UPPER) {
caseVariant = stemmer.caseFoldTitle(caseVariant, wordChars.length);
if (checkWord(caseVariant, wordChars.length, true)) {
return true;
}
}
return checkWord(stemmer.caseFoldLower(caseVariant, wordChars.length), wordChars.length, true);
}
private boolean checkWord(char[] wordChars, int length, boolean caseVariant) {
if (dictionary.isForbiddenWord(wordChars, length, scratch)) {
return false;
}
if (!stemmer.doStem(wordChars, length, caseVariant).isEmpty()) {
return true;
}
if (dictionary.hasCompounding()) {
return checkCompounds(wordChars, 0, length, new ArrayList<>());
}
return false;
}
private boolean checkCompounds(char[] wordChars, int offset, int length, List<IntsRef> words) {
if (words.size() >= 100) return false;
int limit = length - dictionary.compoundMin + 1;
for (int breakPos = dictionary.compoundMin; breakPos < limit; breakPos++) {
IntsRef forms = dictionary.lookupWord(wordChars, offset, breakPos);
if (forms != null) {
words.add(forms);
if (dictionary.compoundRules != null
&& dictionary.compoundRules.stream().anyMatch(r -> r.mayMatch(words, scratch))) {
if (checkLastCompoundPart(wordChars, offset + breakPos, length - breakPos, words)) {
return true;
}
if (checkCompounds(wordChars, offset + breakPos, length - breakPos, words)) {
return true;
}
}
words.remove(words.size() - 1);
}
}
return false;
}
private boolean checkLastCompoundPart(
char[] wordChars, int start, int length, List<IntsRef> words) {
IntsRef forms = dictionary.lookupWord(wordChars, start, length);
if (forms == null) return false;
words.add(forms);
boolean result =
dictionary.compoundRules != null
&& dictionary.compoundRules.stream().anyMatch(r -> r.fullyMatches(words, scratch));
words.remove(words.size() - 1);
return result;
}
private static boolean isNumber(String s) { private static boolean isNumber(String s) {
int i = 0; int i = 0;
while (i < s.length()) { while (i < s.length()) {

View File

@ -112,8 +112,8 @@ final class Stemmer {
private char[] titleBuffer = new char[8]; private char[] titleBuffer = new char[8];
/** returns EXACT_CASE,TITLE_CASE, or UPPER_CASE type for the word */ /** returns EXACT_CASE,TITLE_CASE, or UPPER_CASE type for the word */
private WordCase caseOf(char[] word, int length) { WordCase caseOf(char[] word, int length) {
if (dictionary.ignoreCase || length == 0 || !Character.isUpperCase(word[0])) { if (dictionary.ignoreCase || length == 0 || Character.isLowerCase(word[0])) {
return WordCase.MIXED; return WordCase.MIXED;
} }
@ -121,22 +121,24 @@ final class Stemmer {
} }
/** folds titlecase variant of word to titleBuffer */ /** folds titlecase variant of word to titleBuffer */
private void caseFoldTitle(char[] word, int length) { char[] caseFoldTitle(char[] word, int length) {
titleBuffer = ArrayUtil.grow(titleBuffer, length); titleBuffer = ArrayUtil.grow(titleBuffer, length);
System.arraycopy(word, 0, titleBuffer, 0, length); System.arraycopy(word, 0, titleBuffer, 0, length);
for (int i = 1; i < length; i++) { for (int i = 1; i < length; i++) {
titleBuffer[i] = dictionary.caseFold(titleBuffer[i]); titleBuffer[i] = dictionary.caseFold(titleBuffer[i]);
} }
return titleBuffer;
} }
/** folds lowercase variant of word (title cased) to lowerBuffer */ /** folds lowercase variant of word (title cased) to lowerBuffer */
private void caseFoldLower(char[] word, int length) { char[] caseFoldLower(char[] word, int length) {
lowerBuffer = ArrayUtil.grow(lowerBuffer, length); lowerBuffer = ArrayUtil.grow(lowerBuffer, length);
System.arraycopy(word, 0, lowerBuffer, 0, length); System.arraycopy(word, 0, lowerBuffer, 0, length);
lowerBuffer[0] = dictionary.caseFold(lowerBuffer[0]); lowerBuffer[0] = dictionary.caseFold(lowerBuffer[0]);
return lowerBuffer;
} }
private List<CharsRef> doStem(char[] word, int length, boolean caseVariant) { List<CharsRef> doStem(char[] word, int length, boolean caseVariant) {
List<CharsRef> stems = new ArrayList<>(); List<CharsRef> stems = new ArrayList<>();
IntsRef forms = dictionary.lookupWord(word, 0, length); IntsRef forms = dictionary.lookupWord(word, 0, length);
if (forms != null) { if (forms != null) {

View File

@ -23,7 +23,7 @@ enum WordCase {
MIXED; MIXED;
static WordCase caseOf(char[] word, int length) { static WordCase caseOf(char[] word, int length) {
boolean capitalized = Character.isUpperCase(word[0]); boolean startsWithLower = Character.isLowerCase(word[0]);
boolean seenUpper = false; boolean seenUpper = false;
boolean seenLower = false; boolean seenLower = false;
@ -34,11 +34,11 @@ enum WordCase {
if (seenUpper && seenLower) break; if (seenUpper && seenLower) break;
} }
return get(capitalized, seenUpper, seenLower); return get(startsWithLower, seenUpper, seenLower);
} }
static WordCase caseOf(CharSequence word, int length) { static WordCase caseOf(CharSequence word, int length) {
boolean capitalized = Character.isUpperCase(word.charAt(0)); boolean startsWithLower = Character.isLowerCase(word.charAt(0));
boolean seenUpper = false; boolean seenUpper = false;
boolean seenLower = false; boolean seenLower = false;
@ -49,11 +49,11 @@ enum WordCase {
if (seenUpper && seenLower) break; if (seenUpper && seenLower) break;
} }
return get(capitalized, seenUpper, seenLower); return get(startsWithLower, seenUpper, seenLower);
} }
private static WordCase get(boolean capitalized, boolean seenUpper, boolean seenLower) { private static WordCase get(boolean startsWithLower, boolean seenUpper, boolean seenLower) {
if (capitalized) { if (!startsWithLower) {
return !seenLower ? UPPER : !seenUpper ? TITLE : MIXED; return !seenLower ? UPPER : !seenUpper ? TITLE : MIXED;
} }
return seenUpper ? MIXED : LOWER; return seenUpper ? MIXED : LOWER;

View File

@ -43,6 +43,38 @@ public class SpellCheckerTest extends StemmerTestBase {
doTest("breakoff"); doTest("breakoff");
} }
public void testCompoundrule() throws Exception {
doTest("compoundrule");
}
public void testCompoundrule2() throws Exception {
doTest("compoundrule2");
}
public void testCompoundrule3() throws Exception {
doTest("compoundrule3");
}
public void testCompoundrule4() throws Exception {
doTest("compoundrule4");
}
public void testCompoundrule5() throws Exception {
doTest("compoundrule5");
}
public void testCompoundrule6() throws Exception {
doTest("compoundrule6");
}
public void testCompoundrule7() throws Exception {
doTest("compoundrule7");
}
public void testCompoundrule8() throws Exception {
doTest("compoundrule8");
}
protected void doTest(String name) throws Exception { protected void doTest(String name) throws Exception {
InputStream affixStream = InputStream affixStream =
Objects.requireNonNull(getClass().getResourceAsStream(name + ".aff"), name); Objects.requireNonNull(getClass().getResourceAsStream(name + ".aff"), name);

View File

@ -22,6 +22,7 @@ import java.io.IOException;
import java.io.InputStream; import java.io.InputStream;
import java.nio.charset.StandardCharsets; import java.nio.charset.StandardCharsets;
import java.text.ParseException; import java.text.ParseException;
import java.util.Random;
import org.apache.lucene.store.Directory; import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CharsRef; import org.apache.lucene.util.CharsRef;
@ -33,6 +34,7 @@ import org.apache.lucene.util.fst.FST;
import org.apache.lucene.util.fst.FSTCompiler; import org.apache.lucene.util.fst.FSTCompiler;
import org.apache.lucene.util.fst.Outputs; import org.apache.lucene.util.fst.Outputs;
import org.apache.lucene.util.fst.Util; import org.apache.lucene.util.fst.Util;
import org.junit.Test;
public class TestDictionary extends LuceneTestCase { public class TestDictionary extends LuceneTestCase {
@ -268,6 +270,27 @@ public class TestDictionary extends LuceneTestCase {
assertNotNull(Dictionary.getFlagParsingStrategy("FLAG UTF-8")); assertNotNull(Dictionary.getFlagParsingStrategy("FLAG UTF-8"));
} }
@Test
public void testFlagSerialization() {
Random r = random();
char[] flags = new char[r.nextInt(10)];
for (int i = 0; i < flags.length; i++) {
flags[i] = (char) r.nextInt(Character.MAX_VALUE);
}
String[] flagLines = {"FLAG long", "FLAG UTF-8", "FLAG num"};
for (String flagLine : flagLines) {
Dictionary.FlagParsingStrategy strategy = Dictionary.getFlagParsingStrategy(flagLine);
StringBuilder serialized = new StringBuilder();
for (char flag : flags) {
strategy.appendFlag(flag, serialized);
}
char[] deserialized = strategy.parseFlags(serialized.toString());
assertEquals(new String(flags), new String(deserialized));
}
}
private Directory getDirectory() { private Directory getDirectory() {
return newDirectory(); return newDirectory();
} }

View File

@ -0,0 +1,3 @@
COMPOUNDMIN 1
COMPOUNDRULE 1
COMPOUNDRULE ABC

View File

@ -0,0 +1,5 @@
3
a/A
b/B
c/BC

View File

@ -0,0 +1,39 @@
ba
aaabaaa
bbaaa
aaaaba
bbbbbaa
aa
aaa
aaaa
ab
aab
aaab
aaaab
abb
aabb
aaabbb
bb
bbb
bbbb
aaab
abcc
abbc
abbcc
aabc
aabcc
aabbc
aabbcc
aaabbbccc
ac
aac
aacc
aaaccc
bc
bcc
bbc
bbcc
bbbccc
cc
ccc
cccccc

View File

@ -0,0 +1,3 @@
COMPOUNDMIN 1
COMPOUNDRULE 1
COMPOUNDRULE A*B*C*

View File

@ -0,0 +1,37 @@
aa
aaa
aaaa
ab
aab
aaab
aaaab
abb
aabb
aaabbb
bb
bbb
bbbb
aaab
abc
abcc
abbc
abbcc
aabc
aabcc
aabbc
aabbcc
aaabbbccc
ac
acc
aac
aacc
aaaccc
bc
bcc
bbc
bbcc
bbbccc
cc
ccc
cccccc
abcc

View File

@ -0,0 +1,8 @@
ba
aaabaaa
bbaaa
aaaaba
bbbbbaa
cba
cab
acb

View File

@ -0,0 +1,3 @@
COMPOUNDMIN 1
COMPOUNDRULE 1
COMPOUNDRULE A?B?C?

View File

@ -0,0 +1,7 @@
a
b
c
ab
abc
ac
bc

View File

@ -0,0 +1,41 @@
aa
aaa
aaaa
aab
aaab
aaaab
abb
aabb
aaabbb
bb
bbb
bbbb
aaab
abcc
abbc
abbcc
aabc
aabcc
aabbc
aabbcc
aaabbbccc
acc
aac
aacc
aaaccc
bcc
bbc
bbcc
bbbccc
cc
ccc
cccccc
abcc
ba
aaabaaa
bbaaa
aaaaba
bbbbbaa
cba
cab
acb

View File

@ -0,0 +1,7 @@
# English ordinal numbers
WORDCHARS 0123456789
COMPOUNDMIN 1
ONLYINCOMPOUND c
COMPOUNDRULE 2
COMPOUNDRULE n*1t
COMPOUNDRULE n*mp

View File

@ -0,0 +1,24 @@
22
0/nm
1/n1
2/nm
3/nm
4/nm
5/nm
6/nm
7/nm
8/nm
9/nm
0th/pt
1st/p
1th/tc
2nd/p
2th/tc
3rd/p
3th/tc
4th/pt
5th/pt
6th/pt
7th/pt
8th/pt
9th/pt

View File

@ -0,0 +1,31 @@
1st
2nd
3rd
4th
5th
6th
7th
8th
9th
10th
11th
12th
13th
14th
15th
16th
17th
18th
19th
20th
21st
22nd
23rd
24th
25th
100th
1000th
10001st
10011th
1ST
42ND

View File

@ -0,0 +1,5 @@
1th
2th
3th
10001th
10011st

View File

@ -0,0 +1,7 @@
# number + percent
SET UTF-8
COMPOUNDMIN 1
COMPOUNDRULE 2
COMPOUNDRULE N*%?
COMPOUNDRULE NN*.NN*%?
WORDCHARS 0123456789‰.

View File

@ -0,0 +1,14 @@
13
0/N po:num
1/N po:num
2/N po:num
3/N po:num
4/N po:num
5/N po:num
6/N po:num
7/N po:num
8/N po:num
9/N po:num
./. po:sign_dot
%/% po:sign_percent
‰/% po:sign_per_mille

View File

@ -0,0 +1,7 @@
10%
0.2%
0.20%
123.4561‰
10
0000
10.25

View File

@ -0,0 +1,4 @@
COMPOUNDMIN 1
COMPOUNDRULE 2
COMPOUNDRULE A*A
COMPOUNDRULE A*AAB*BBBC*C

View File

@ -0,0 +1,4 @@
aa
aaaaaa
aabbbc
aaaaabbbbbbcccccc

View File

@ -0,0 +1,4 @@
abc
abbbbbccccccc
aabbccccccc
aabbbbbbb

View File

@ -0,0 +1,8 @@
# English ordinal numbers (parenthesized long flags)
FLAG long
WORDCHARS 0123456789
COMPOUNDMIN 1
ONLYINCOMPOUND cc
COMPOUNDRULE 2
COMPOUNDRULE (nn)*(11)(tt)
COMPOUNDRULE (nn)*(mm)(pp)

View File

@ -0,0 +1,24 @@
22
0/nnmm
1/nn11
2/nnmm
3/nnmm
4/nnmm
5/nnmm
6/nnmm
7/nnmm
8/nnmm
9/nnmm
0th/pptt
1st/pp
1th/ttcc
2nd/pp
2th/ttcc
3rd/pp
3th/ttcc
4th/pptt
5th/pptt
6th/pptt
7th/pptt
8th/pptt
9th/pptt

View File

@ -0,0 +1,29 @@
1st
2nd
3rd
4th
5th
6th
7th
8th
9th
10th
11th
12th
13th
14th
15th
16th
17th
18th
19th
20th
21st
22nd
23rd
24th
25th
100th
1000th
10001st
10011th

View File

@ -0,0 +1,5 @@
1th
2th
3th
10001th
10011st

View File

@ -0,0 +1,8 @@
# English ordinal numbers (parenthesized numerical flags)
FLAG num
WORDCHARS 0123456789
COMPOUNDMIN 1
ONLYINCOMPOUND 1000
COMPOUNDRULE 2
COMPOUNDRULE (1001)*(1002)(2001)
COMPOUNDRULE (1001)*(2002)(2000)

View File

@ -0,0 +1,24 @@
22
0/1001,2002
1/1001,1002
2/1001,2002
3/1001,2002
4/1001,2002
5/1001,2002
6/1001,2002
7/1001,2002
8/1001,2002
9/1001,2002
0th/2000,2001
1st/2000
1th/2001,1000
2nd/2000
2th/2001,1000
3rd/2000
3th/2001,1000
4th/2000,2001
5th/2000,2001
6th/2000,2001
7th/2000,2001
8th/2000,2001
9th/2000,2001

View File

@ -0,0 +1,29 @@
1st
2nd
3rd
4th
5th
6th
7th
8th
9th
10th
11th
12th
13th
14th
15th
16th
17th
18th
19th
20th
21st
22nd
23rd
24th
25th
100th
1000th
10001st
10011th

View File

@ -0,0 +1,5 @@
1th
2th
3th
10001th
10011st