diff --git a/lucene/core/src/java/org/apache/lucene/util/compress/LowercaseAsciiCompression.java b/lucene/core/src/java/org/apache/lucene/util/compress/LowercaseAsciiCompression.java index a70a89efc08..1f287c3b4fd 100644 --- a/lucene/core/src/java/org/apache/lucene/util/compress/LowercaseAsciiCompression.java +++ b/lucene/core/src/java/org/apache/lucene/util/compress/LowercaseAsciiCompression.java @@ -111,14 +111,13 @@ public final class LowercaseAsciiCompression { numExceptions2++; } } + + // TODO: shouldn't this really be an assert instead? but then this real "if" triggered + // LUCENE-10551 so maybe it should remain a real "if": + if (numExceptions != numExceptions2) { throw new IllegalStateException( - "" - + numExceptions - + " <> " - + numExceptions2 - + " " - + new BytesRef(in, 0, len).utf8ToString()); + "" + numExceptions + " <> " + numExceptions2 + " " + new BytesRef(in, 0, len)); } } diff --git a/lucene/core/src/test/org/apache/lucene/util/compress/TestLowercaseAsciiCompression.java b/lucene/core/src/test/org/apache/lucene/util/compress/TestLowercaseAsciiCompression.java index fd8374575e8..3c18d0334fb 100644 --- a/lucene/core/src/test/org/apache/lucene/util/compress/TestLowercaseAsciiCompression.java +++ b/lucene/core/src/test/org/apache/lucene/util/compress/TestLowercaseAsciiCompression.java @@ -17,6 +17,7 @@ package org.apache.lucene.util.compress; import java.io.IOException; +import java.nio.charset.StandardCharsets; import java.util.stream.Collectors; import java.util.stream.IntStream; import org.apache.lucene.store.ByteBuffersDataOutput; @@ -47,20 +48,35 @@ public class TestLowercaseAsciiCompression extends LuceneTestCase { } public void testSimple() throws Exception { - assertFalse(doTestCompress("".getBytes("UTF-8"))); // too short - assertFalse(doTestCompress("ab1".getBytes("UTF-8"))); // too short - assertFalse(doTestCompress("ab1cdef".getBytes("UTF-8"))); // too short - assertTrue(doTestCompress("ab1cdefg".getBytes("UTF-8"))); - assertFalse(doTestCompress("ab1cdEfg".getBytes("UTF-8"))); // too many exceptions - assertTrue(doTestCompress("ab1cdefg".getBytes("UTF-8"))); + assertFalse(doTestCompress("".getBytes(StandardCharsets.UTF_8))); // too short + assertFalse(doTestCompress("ab1".getBytes(StandardCharsets.UTF_8))); // too short + assertFalse(doTestCompress("ab1cdef".getBytes(StandardCharsets.UTF_8))); // too short + assertTrue(doTestCompress("ab1cdefg".getBytes(StandardCharsets.UTF_8))); + assertFalse(doTestCompress("ab1cdEfg".getBytes(StandardCharsets.UTF_8))); // too many exceptions + assertTrue(doTestCompress("ab1cdefg".getBytes(StandardCharsets.UTF_8))); // 1 exception, but enough chars to be worth encoding an exception - assertTrue(doTestCompress("ab1.dEfg427hiogchio:'nwm un!94twxz".getBytes("UTF-8"))); + assertTrue( + doTestCompress("ab1.dEfg427hiogchio:'nwm un!94twxz".getBytes(StandardCharsets.UTF_8))); + } + + // LUCENE-10551 + public void testNotReallySimple() throws Exception { + doTestCompress( + "cion1cion_desarrollociones_oraclecionesnaturacionesnatura2tedppsa-integrationdemotiontion cloud gen2tion instance - dev1tion instance - testtion-devbtion-instancetion-prdtion-promerication-qation064533tion535217tion697401tion761348tion892818tion_matrationcauto_simmonsintgic_testtioncloudprodictioncloudservicetiongateway10tioninstance-jtsundatamartprd??o" + .getBytes(StandardCharsets.UTF_8)); + } + + // LUCENE-10551 + public void testNotReallySimple2() throws Exception { + doTestCompress( + "analytics-platform-test/koala/cluster-tool:1.0-20220310151438.492,mesh_istio_examples-bookinfo-details-v1:1.16.2mesh_istio_examples-bookinfo-reviews-v3:1.16.2oce-clamav:1.0.219oce-tesseract:1.0.7oce-traefik:2.5.1oci-opensearch:1.2.4.8.103oda-digital-assistant-control-plane-train-pool-workflow-v6:22.02.14oke-coresvcs-k8s-dns-dnsmasq-nanny-amd64@sha256:41aa9160ceeaf712369ddb660d02e5ec06d1679965e6930351967c8cf5ed62d4oke-coresvcs-k8s-dns-kube-dns-amd64@sha256:2cf34b04106974952996c6ef1313f165ce65b4ad68a3051f51b1b8f91ba5f838oke-coresvcs-k8s-dns-sidecar-amd64@sha256:8a82c7288725cb4de9c7cd8d5a78279208e379f35751539b406077f9a3163dcdoke-coresvcs-node-problem-detector@sha256:9d54df11804a862c54276648702a45a6a0027a9d930a86becd69c34cc84bf510oke-coresvcs-oke-fluentd-lumberjack@sha256:5f3f10b187eb804ce4e84bc3672de1cf318c0f793f00dac01cd7da8beea8f269oke-etcd-operator@sha256:4353a2e5ef02bb0f6b046a8d6219b1af359a2c1141c358ff110e395f29d0bfc8oke-oke-hyperkube-amd64@sha256:3c734f46099400507f938090eb9a874338fa25cde425ac9409df4c885759752foke-public-busybox@sha256:4cee1979ba0bf7db9fc5d28fb7b798ca69ae95a47c5fecf46327720df4ff352doke-public-coredns@sha256:86f8cfc74497f04e181ab2e1d26d2fd8bd46c4b33ce24b55620efcdfcb214670oke-public-coredns@sha256:8cd974302f1f6108f6f31312f8181ae723b514e2022089cdcc3db10666c49228oke-public-etcd@sha256:b751e459bc2a8f079f6730dd8462671b253c7c8b0d0eb47c67888d5091c6bb77oke-public-etcd@sha256:d6a76200a6e9103681bc2cf7fefbcada0dd9372d52cf8964178d846b89959d14oke-public-etcd@sha256:fa056479342b45479ac74c58176ddad43687d5fc295375d705808f9dfb48439aoke-public-kube-proxy@sha256:93b2da69d03413671606e22294c59a69fe404088a5f6e74d6394a8641fdb899boke-public-tiller@sha256:c2eb6e580123622e1bc0ff3becae3a3a71ac36c98a2786d780590197839175e5osms/opcbuild-osms-agent-proxy-java:0.4.0-129rosms/opcbuild-osms-agent-proxy-nginx:0.4.0-129rosms/opcbuild-osms-ingestion-cert:0.4.0-129rscs-lcm/drift-detector:227scs-lcm/salt-state-sync:242streaming-alpine:30.10.183streaming-kafka:30.10.183vision-service-document-classification:1.1.55vision-service-image-classification:1.4.52" + .getBytes(StandardCharsets.UTF_8)); } public void testFarAwayExceptions() throws Exception { String s = "01W" + IntStream.range(0, 300).mapToObj(i -> "a").collect(Collectors.joining()) + "W."; - assertTrue(doTestCompress(s.getBytes("UTF-8"))); + assertTrue(doTestCompress(s.getBytes(StandardCharsets.UTF_8))); } public void testRandomAscii() throws IOException { @@ -118,4 +134,13 @@ public class TestLowercaseAsciiCompression extends LuceneTestCase { doTestCompress(bytes, len); } } + + public void testAsciiCompressionRandom2() throws IOException { + int iters = atLeast(1000); + for (int iter = 0; iter < iters; ++iter) { + doTestCompress( + TestUtil.randomSubString(random(), atLeast(400), random().nextBoolean()) + .getBytes(StandardCharsets.UTF_8)); + } + } }