From e875bd81435bc7627e4eab9a18490583d2cee604 Mon Sep 17 00:00:00 2001 From: Tim Allison Date: Tue, 7 Jan 2020 15:45:07 +0000 Subject: [PATCH] BUG 64015 -- swap out java.util.BitSet for zaxxer's SparseBitSet git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1872445 13f79535-47bb-0310-9956-ffa450edef68 --- build.gradle | 2 ++ build.xml | 8 ++++++ maven/poi.pom | 5 ++++ sonar/main/pom.xml | 5 ++++ .../org/apache/poi/ddf/EscherDggRecord.java | 4 +-- .../crypt/ChunkedCipherOutputStream.java | 10 +++---- .../poi/poifs/crypt/xor/XOREncryptor.java | 4 +-- .../poi/sl/extractor/SlideShowExtractor.java | 27 +++++++++++++++++++ .../poi/ss/format/CellNumberFormatter.java | 4 +-- .../openxml4j/opc/PackagePartCollection.java | 11 ++++++-- .../apache/poi/xslf/usermodel/XSLFSheet.java | 4 +-- .../poi/xssf/binary/XSSFBHyperlinksTable.java | 4 +-- .../apache/poi/xssf/binary/XSSFBParser.java | 6 ++--- .../poi/xssf/eventusermodel/XSSFBReader.java | 4 +-- 14 files changed, 76 insertions(+), 22 deletions(-) diff --git a/build.gradle b/build.gradle index 162048ab72..5c9487d40b 100644 --- a/build.gradle +++ b/build.gradle @@ -185,6 +185,7 @@ project('main') { compile 'com.sun.xml.bind:jaxb-impl:2.3.2' compile 'com.sun.xml.bind:jaxb-core:2.3.0.1' compile 'javax.activation:activation:1.1.1' + compile 'com.zaxxer:SparseBitSet:1.2' testCompile 'junit:junit:4.12' testCompile 'org.mockito:mockito-core:3.0.0' @@ -236,6 +237,7 @@ project('ooxml') { compile 'org.apache.santuario:xmlsec:2.1.2' compile 'org.bouncycastle:bcpkix-jdk15on:1.62' compile 'com.github.virtuald:curvesapi:1.06' + compile 'com.zaxxer:SparseBitSet:1.2' // compile only, don't add it to our dist as it blows up the size compile 'org.apache.xmlgraphics:batik-all:1.11' diff --git a/build.xml b/build.xml index e165daf01f..0ac0854150 100644 --- a/build.xml +++ b/build.xml @@ -218,6 +218,9 @@ under the License. + + @@ -378,6 +381,7 @@ under the License. + @@ -752,6 +756,7 @@ under the License. + @@ -788,6 +793,7 @@ under the License. + @@ -2038,6 +2044,7 @@ under the License. + @@ -2320,6 +2327,7 @@ under the License. + diff --git a/maven/poi.pom b/maven/poi.pom index d0cdf9baf5..a3d15a8b0b 100644 --- a/maven/poi.pom +++ b/maven/poi.pom @@ -88,6 +88,11 @@ commons-math3 3.6.1 + + com.zaxxer + SparseBitSet + 1.2 + org.hamcrest diff --git a/sonar/main/pom.xml b/sonar/main/pom.xml index 23c9b961ab..5a4a8287b7 100644 --- a/sonar/main/pom.xml +++ b/sonar/main/pom.xml @@ -125,6 +125,11 @@ commons-codec 1.13 + + com.zaxxer + SparseBitSet + 1.2 + commons-logging commons-logging diff --git a/src/java/org/apache/poi/ddf/EscherDggRecord.java b/src/java/org/apache/poi/ddf/EscherDggRecord.java index f1e3cc75cc..b4705ad383 100644 --- a/src/java/org/apache/poi/ddf/EscherDggRecord.java +++ b/src/java/org/apache/poi/ddf/EscherDggRecord.java @@ -19,11 +19,11 @@ package org.apache.poi.ddf; import java.util.ArrayList; import java.util.Arrays; -import java.util.BitSet; import java.util.List; import java.util.Map; import java.util.function.Supplier; +import com.zaxxer.sparsebits.SparseBitSet; import org.apache.poi.common.usermodel.GenericRecord; import org.apache.poi.util.GenericRecordUtil; import org.apache.poi.util.LittleEndian; @@ -301,7 +301,7 @@ public final class EscherDggRecord extends EscherRecord { * @return the next available drawing group id */ public short findNewDrawingGroupId() { - BitSet bs = new BitSet(); + SparseBitSet bs = new SparseBitSet(); bs.set(0); for (FileIdCluster fic : field_5_fileIdClusters) { bs.set(fic.getDrawingGroupId()); diff --git a/src/java/org/apache/poi/poifs/crypt/ChunkedCipherOutputStream.java b/src/java/org/apache/poi/poifs/crypt/ChunkedCipherOutputStream.java index b98e229d8e..e00b1c2bac 100644 --- a/src/java/org/apache/poi/poifs/crypt/ChunkedCipherOutputStream.java +++ b/src/java/org/apache/poi/poifs/crypt/ChunkedCipherOutputStream.java @@ -25,13 +25,13 @@ import java.io.FilterOutputStream; import java.io.IOException; import java.io.OutputStream; import java.security.GeneralSecurityException; -import java.util.BitSet; import javax.crypto.BadPaddingException; import javax.crypto.Cipher; import javax.crypto.IllegalBlockSizeException; import javax.crypto.ShortBufferException; +import com.zaxxer.sparsebits.SparseBitSet; import org.apache.poi.EncryptedDocumentException; import org.apache.poi.poifs.filesystem.DirectoryNode; import org.apache.poi.poifs.filesystem.POIFSWriterEvent; @@ -56,7 +56,7 @@ public abstract class ChunkedCipherOutputStream extends FilterOutputStream { private final int chunkBits; private final byte[] chunk; - private final BitSet plainByteFlags; + private final SparseBitSet plainByteFlags; private final File fileOut; private final DirectoryNode dir; @@ -74,7 +74,7 @@ public abstract class ChunkedCipherOutputStream extends FilterOutputStream { this.chunkSize = chunkSize; int cs = chunkSize == STREAMING ? 4096 : chunkSize; this.chunk = IOUtils.safelyAllocate(cs, MAX_RECORD_LENGTH); - this.plainByteFlags = new BitSet(cs); + this.plainByteFlags = new SparseBitSet(cs); this.chunkBits = Integer.bitCount(cs-1); this.fileOut = TempFile.createTempFile("encrypted_package", "crypt"); this.fileOut.deleteOnExit(); @@ -88,7 +88,7 @@ public abstract class ChunkedCipherOutputStream extends FilterOutputStream { this.chunkSize = chunkSize; int cs = chunkSize == STREAMING ? 4096 : chunkSize; this.chunk = IOUtils.safelyAllocate(cs, MAX_RECORD_LENGTH); - this.plainByteFlags = new BitSet(cs); + this.plainByteFlags = new SparseBitSet(cs); this.chunkBits = Integer.bitCount(cs-1); this.fileOut = null; this.dir = null; @@ -283,7 +283,7 @@ public abstract class ChunkedCipherOutputStream extends FilterOutputStream { return chunk; } - protected BitSet getPlainByteFlags() { + protected SparseBitSet getPlainByteFlags() { return plainByteFlags; } diff --git a/src/java/org/apache/poi/poifs/crypt/xor/XOREncryptor.java b/src/java/org/apache/poi/poifs/crypt/xor/XOREncryptor.java index 3b180b20d4..0c1cbf4932 100644 --- a/src/java/org/apache/poi/poifs/crypt/xor/XOREncryptor.java +++ b/src/java/org/apache/poi/poifs/crypt/xor/XOREncryptor.java @@ -21,11 +21,11 @@ import java.io.File; import java.io.IOException; import java.io.OutputStream; import java.security.GeneralSecurityException; -import java.util.BitSet; import javax.crypto.Cipher; import javax.crypto.spec.SecretKeySpec; +import com.zaxxer.sparsebits.SparseBitSet; import org.apache.poi.EncryptedDocumentException; import org.apache.poi.poifs.crypt.ChunkedCipherOutputStream; import org.apache.poi.poifs.crypt.CryptoFunctions; @@ -139,7 +139,7 @@ public class XOREncryptor extends Encryptor { final int start = Math.max(posInChunk-(recordEnd-recordStart), 0); - final BitSet plainBytes = getPlainByteFlags(); + final SparseBitSet plainBytes = getPlainByteFlags(); final byte[] xorArray = getEncryptionInfo().getEncryptor().getSecretKey().getEncoded(); final byte[] chunk = getChunk(); final byte[] plain = (plainBytes.isEmpty()) ? null : chunk.clone(); diff --git a/src/java/org/apache/poi/sl/extractor/SlideShowExtractor.java b/src/java/org/apache/poi/sl/extractor/SlideShowExtractor.java index 7173c24e97..b99994bc12 100644 --- a/src/java/org/apache/poi/sl/extractor/SlideShowExtractor.java +++ b/src/java/org/apache/poi/sl/extractor/SlideShowExtractor.java @@ -25,6 +25,7 @@ import java.util.function.Consumer; import java.util.function.Function; import java.util.function.Predicate; +import com.zaxxer.sparsebits.SparseBitSet; import org.apache.poi.extractor.POITextExtractor; import org.apache.poi.sl.usermodel.MasterSheet; import org.apache.poi.sl.usermodel.Notes; @@ -382,7 +383,9 @@ public class SlideShowExtractor< * @param bold use {@code true} for bold TextRuns, {@code false} for non-bold ones and * {@code null} if it doesn't matter * @return a bitset with the marked/used codepoints + * @deprecated use {@link #getCodepointsInSparseBitSet(String, Boolean, Boolean)} */ + @Deprecated public BitSet getCodepoints(String typeface, Boolean italic, Boolean bold) { final BitSet glyphs = new BitSet(); @@ -399,6 +402,30 @@ public class SlideShowExtractor< return glyphs; } + /** + * Extract the used codepoints for font embedding / subsetting + * @param typeface the typeface/font family of the textruns to examine + * @param italic use {@code true} for italic TextRuns, {@code false} for non-italic ones and + * {@code null} if it doesn't matter + * @param bold use {@code true} for bold TextRuns, {@code false} for non-bold ones and + * {@code null} if it doesn't matter + * @return a bitset with the marked/used codepoints + */ + public SparseBitSet getCodepointsInSparseBitSet(String typeface, Boolean italic, Boolean bold) { + final SparseBitSet glyphs = new SparseBitSet(); + + Predicate filterOld = filter; + try { + filter = o -> filterFonts(o, typeface, italic, bold); + slideshow.getSlides().forEach(slide -> + getText(slide, s -> s.codePoints().forEach(glyphs::set)) + ); + } finally { + filter = filterOld; + } + + return glyphs; + } private static boolean filterFonts(Object o, String typeface, Boolean italic, Boolean bold) { if (!(o instanceof TextRun)) { return false; diff --git a/src/java/org/apache/poi/ss/format/CellNumberFormatter.java b/src/java/org/apache/poi/ss/format/CellNumberFormatter.java index a7e7666758..2464359019 100644 --- a/src/java/org/apache/poi/ss/format/CellNumberFormatter.java +++ b/src/java/org/apache/poi/ss/format/CellNumberFormatter.java @@ -20,7 +20,6 @@ import java.text.DecimalFormat; import java.text.DecimalFormatSymbols; import java.text.FieldPosition; import java.util.ArrayList; -import java.util.BitSet; import java.util.Collections; import java.util.Formatter; import java.util.Iterator; @@ -30,6 +29,7 @@ import java.util.Locale; import java.util.Set; import java.util.TreeSet; +import com.zaxxer.sparsebits.SparseBitSet; import org.apache.poi.util.LocaleUtil; import org.apache.poi.util.POILogFactory; import org.apache.poi.util.POILogger; @@ -475,7 +475,7 @@ public class CellNumberFormatter extends CellFormatter { Iterator changes = mods.iterator(); CellNumberStringMod nextChange = (changes.hasNext() ? changes.next() : null); // records chars already deleted - BitSet deletedChars = new BitSet(); + SparseBitSet deletedChars = new SparseBitSet(); int adjust = 0; for (Special s : specials) { int adjustedPos = s.pos + adjust; diff --git a/src/ooxml/java/org/apache/poi/openxml4j/opc/PackagePartCollection.java b/src/ooxml/java/org/apache/poi/openxml4j/opc/PackagePartCollection.java index f295aad99c..4580230f3d 100644 --- a/src/ooxml/java/org/apache/poi/openxml4j/opc/PackagePartCollection.java +++ b/src/ooxml/java/org/apache/poi/openxml4j/opc/PackagePartCollection.java @@ -18,7 +18,6 @@ package org.apache.poi.openxml4j.opc; import java.io.Serializable; -import java.util.BitSet; import java.util.Collection; import java.util.Collections; import java.util.HashSet; @@ -28,6 +27,7 @@ import java.util.function.ToIntFunction; import java.util.regex.Matcher; import java.util.regex.Pattern; +import com.zaxxer.sparsebits.SparseBitSet; import org.apache.poi.openxml4j.exceptions.InvalidFormatException; import org.apache.poi.openxml4j.exceptions.InvalidOperationException; @@ -139,6 +139,13 @@ public final class PackagePartCollection implements Serializable { return packagePartLookup.keySet().stream() .mapToInt(indexFromName) - .collect(BitSet::new, BitSet::set, BitSet::or).nextClearBit(1); + .collect(MySparseBitSet::new, MySparseBitSet::set, MySparseBitSet::myOr).nextClearBit(1); } + + private class MySparseBitSet extends SparseBitSet { + + public void myOr(MySparseBitSet other) { + this.or(other); + } + } } diff --git a/src/ooxml/java/org/apache/poi/xslf/usermodel/XSLFSheet.java b/src/ooxml/java/org/apache/poi/xslf/usermodel/XSLFSheet.java index d76142a4c4..97c490d82e 100644 --- a/src/ooxml/java/org/apache/poi/xslf/usermodel/XSLFSheet.java +++ b/src/ooxml/java/org/apache/poi/xslf/usermodel/XSLFSheet.java @@ -25,7 +25,6 @@ import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; import java.util.ArrayList; -import java.util.BitSet; import java.util.HashMap; import java.util.Iterator; import java.util.List; @@ -34,6 +33,7 @@ import java.util.Optional; import javax.xml.namespace.QName; +import com.zaxxer.sparsebits.SparseBitSet; import org.apache.poi.ooxml.POIXMLDocumentPart; import org.apache.poi.ooxml.POIXMLException; import org.apache.poi.openxml4j.opc.OPCPackage; @@ -82,7 +82,7 @@ implements XSLFShapeContainer, Sheet { private Map _placeholderByIdMap; private Map _placeholderByTypeMap; - private final BitSet shapeIds = new BitSet(); + private final SparseBitSet shapeIds = new SparseBitSet(); protected XSLFSheet() { super(); diff --git a/src/ooxml/java/org/apache/poi/xssf/binary/XSSFBHyperlinksTable.java b/src/ooxml/java/org/apache/poi/xssf/binary/XSSFBHyperlinksTable.java index faa8cdd8d5..ca76fb0332 100644 --- a/src/ooxml/java/org/apache/poi/xssf/binary/XSSFBHyperlinksTable.java +++ b/src/ooxml/java/org/apache/poi/xssf/binary/XSSFBHyperlinksTable.java @@ -21,13 +21,13 @@ import java.io.IOException; import java.io.InputStream; import java.io.Serializable; import java.util.ArrayList; -import java.util.BitSet; import java.util.Comparator; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.TreeMap; +import com.zaxxer.sparsebits.SparseBitSet; import org.apache.poi.openxml4j.exceptions.InvalidFormatException; import org.apache.poi.openxml4j.opc.PackagePart; import org.apache.poi.openxml4j.opc.PackageRelationship; @@ -43,7 +43,7 @@ import org.apache.poi.xssf.usermodel.XSSFRelation; @Internal public class XSSFBHyperlinksTable { - private static final BitSet RECORDS = new BitSet(); + private static final SparseBitSet RECORDS = new SparseBitSet(); static { diff --git a/src/ooxml/java/org/apache/poi/xssf/binary/XSSFBParser.java b/src/ooxml/java/org/apache/poi/xssf/binary/XSSFBParser.java index f2b0b7be67..398d53bc92 100644 --- a/src/ooxml/java/org/apache/poi/xssf/binary/XSSFBParser.java +++ b/src/ooxml/java/org/apache/poi/xssf/binary/XSSFBParser.java @@ -19,8 +19,8 @@ package org.apache.poi.xssf.binary; import java.io.IOException; import java.io.InputStream; -import java.util.BitSet; +import com.zaxxer.sparsebits.SparseBitSet; import org.apache.poi.util.IOUtils; import org.apache.poi.util.Internal; import org.apache.poi.util.LittleEndianInputStream; @@ -39,7 +39,7 @@ public abstract class XSSFBParser { private static final int MAX_RECORD_LENGTH = 1_000_000; private final LittleEndianInputStream is; - private final BitSet records; + private final SparseBitSet records; public XSSFBParser(InputStream is) { this.is = new LittleEndianInputStream(is); @@ -51,7 +51,7 @@ public abstract class XSSFBParser { * @param is inputStream * @param bitSet call {@link #handleRecord(int, byte[])} only on those records in this bitSet */ - protected XSSFBParser(InputStream is, BitSet bitSet) { + protected XSSFBParser(InputStream is, SparseBitSet bitSet) { this.is = new LittleEndianInputStream(is); records = bitSet; } diff --git a/src/ooxml/java/org/apache/poi/xssf/eventusermodel/XSSFBReader.java b/src/ooxml/java/org/apache/poi/xssf/eventusermodel/XSSFBReader.java index b4f2024a24..ac416bb4d6 100644 --- a/src/ooxml/java/org/apache/poi/xssf/eventusermodel/XSSFBReader.java +++ b/src/ooxml/java/org/apache/poi/xssf/eventusermodel/XSSFBReader.java @@ -20,7 +20,6 @@ import java.io.IOException; import java.io.InputStream; import java.util.ArrayList; import java.util.Arrays; -import java.util.BitSet; import java.util.Collections; import java.util.HashSet; import java.util.Iterator; @@ -28,6 +27,7 @@ import java.util.LinkedList; import java.util.List; import java.util.Set; +import com.zaxxer.sparsebits.SparseBitSet; import org.apache.poi.openxml4j.exceptions.InvalidFormatException; import org.apache.poi.openxml4j.exceptions.OpenXML4JException; import org.apache.poi.openxml4j.opc.OPCPackage; @@ -173,7 +173,7 @@ public class XSSFBReader extends XSSFReader { private static class PathExtractor extends XSSFBParser { - private static BitSet RECORDS = new BitSet(); + private static SparseBitSet RECORDS = new SparseBitSet(); static { RECORDS.set(XSSFBRecordType.BrtAbsPath15.getId()); }