BUG 64015 -- swap out java.util.BitSet for zaxxer's SparseBitSet

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1872445 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Tim Allison 2020-01-07 15:45:07 +00:00
parent fb8f171900
commit e875bd8143
14 changed files with 76 additions and 22 deletions

View File

@ -185,6 +185,7 @@ project('main') {
compile 'com.sun.xml.bind:jaxb-impl:2.3.2'
compile 'com.sun.xml.bind:jaxb-core:2.3.0.1'
compile 'javax.activation:activation:1.1.1'
compile 'com.zaxxer:SparseBitSet:1.2'
testCompile 'junit:junit:4.12'
testCompile 'org.mockito:mockito-core:3.0.0'
@ -236,6 +237,7 @@ project('ooxml') {
compile 'org.apache.santuario:xmlsec:2.1.2'
compile 'org.bouncycastle:bcpkix-jdk15on:1.62'
compile 'com.github.virtuald:curvesapi:1.06'
compile 'com.zaxxer:SparseBitSet:1.2'
// compile only, don't add it to our dist as it blows up the size
compile 'org.apache.xmlgraphics:batik-all:1.11'

View File

@ -218,6 +218,9 @@ under the License.
<property name="main.activation.jar" location="${main.lib}/activation-1.1.1.jar"/>
<property name="main.activation.url"
value="${repository.m2}/maven2/javax/activation/activation/1.1.1/activation-1.1.1.jar"/>
<property name="main.com.zaxxer.jar" location="${main.lib}/SparseBitSet-1.2.jar"/>
<property name="main.com.zaxxer.url"
value="${repository.m2}/maven2/com/zaxxer/SparseBitSet/1.2/SparseBitSet-1.2.jar"/>
<!-- xml signature libs -->
<property name="dsig.xmlsec.jar" location="${compile.lib}/xmlsec-2.1.2.jar"/>
@ -378,6 +381,7 @@ under the License.
<pathelement location="${main.xmlbind-impl.jar}"/>
<pathelement location="${main.xmlbind-core.jar}"/>
<pathelement location="${main.activation.jar}"/>
<pathelement location="${main.com.zaxxer.jar}"/>
</path>
<!-- some libraries should only be required for compiling/running tests -->
@ -752,6 +756,7 @@ under the License.
<available file="${main.xmlbind-impl.jar}"/>
<available file="${main.xmlbind-core.jar}"/>
<available file="${main.activation.jar}"/>
<available file="${main.com.zaxxer.jar}"/>
<!-- we had some CI failures when the extracted files for JaCoCo were missing somehow... -->
<available file="${main.lib}/jacocoagent.jar"/>
@ -788,6 +793,7 @@ under the License.
<downloadfile src="${main.xmlbind-impl.url}" dest="${main.xmlbind-impl.jar}"/>
<downloadfile src="${main.xmlbind-core.url}" dest="${main.xmlbind-core.jar}"/>
<downloadfile src="${main.activation.url}" dest="${main.activation.jar}"/>
<downloadfile src="${main.com.zaxxer.url}" dest="${main.com.zaxxer.jar}"/>
<unzip src="${jacoco.zip}" dest=".">
<patternset>
<include name="lib/*.jar"/>
@ -2038,6 +2044,7 @@ under the License.
<include name="activation-*.jar"/>
<include name="junit-*.jar"/>
<include name="log4j-*.jar"/>
<include name="SparseBitSet-*.jar"/>
</fileset>
<globmapper from="*" to="${zipdir}/lib/*"/>
</mappedresources>
@ -2320,6 +2327,7 @@ under the License.
<auxClasspath path="${main.xmlbind-impl.jar}" />
<auxClasspath path="${main.xmlbind-core.jar}" />
<auxClasspath path="${main.activation.jar}" />
<auxClasspath path="${main.com.zaxxer.jar}" />
<auxClasspath path="${svg.batik-all.jar}"/>
<auxClasspath path="${svg.xml-apis-ext.jar}"/>
<auxClasspath path="${svg.xmlgraphics-commons.jar}"/>

View File

@ -88,6 +88,11 @@
<artifactId>commons-math3</artifactId>
<version>3.6.1</version>
</dependency>
<dependency>
<groupId>com.zaxxer</groupId>
<artifactId>SparseBitSet</artifactId>
<version>1.2</version>
</dependency>
<dependency>
<groupId>org.hamcrest</groupId>

View File

@ -125,6 +125,11 @@
<artifactId>commons-codec</artifactId>
<version>1.13</version>
</dependency>
<dependency>
<groupId>com.zaxxer</groupId>
<artifactId>SparseBitSet</artifactId>
<version>1.2</version>
</dependency>
<dependency>
<groupId>commons-logging</groupId>
<artifactId>commons-logging</artifactId>

View File

@ -19,11 +19,11 @@ package org.apache.poi.ddf;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.BitSet;
import java.util.List;
import java.util.Map;
import java.util.function.Supplier;
import com.zaxxer.sparsebits.SparseBitSet;
import org.apache.poi.common.usermodel.GenericRecord;
import org.apache.poi.util.GenericRecordUtil;
import org.apache.poi.util.LittleEndian;
@ -301,7 +301,7 @@ public final class EscherDggRecord extends EscherRecord {
* @return the next available drawing group id
*/
public short findNewDrawingGroupId() {
BitSet bs = new BitSet();
SparseBitSet bs = new SparseBitSet();
bs.set(0);
for (FileIdCluster fic : field_5_fileIdClusters) {
bs.set(fic.getDrawingGroupId());

View File

@ -25,13 +25,13 @@ import java.io.FilterOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.security.GeneralSecurityException;
import java.util.BitSet;
import javax.crypto.BadPaddingException;
import javax.crypto.Cipher;
import javax.crypto.IllegalBlockSizeException;
import javax.crypto.ShortBufferException;
import com.zaxxer.sparsebits.SparseBitSet;
import org.apache.poi.EncryptedDocumentException;
import org.apache.poi.poifs.filesystem.DirectoryNode;
import org.apache.poi.poifs.filesystem.POIFSWriterEvent;
@ -56,7 +56,7 @@ public abstract class ChunkedCipherOutputStream extends FilterOutputStream {
private final int chunkBits;
private final byte[] chunk;
private final BitSet plainByteFlags;
private final SparseBitSet plainByteFlags;
private final File fileOut;
private final DirectoryNode dir;
@ -74,7 +74,7 @@ public abstract class ChunkedCipherOutputStream extends FilterOutputStream {
this.chunkSize = chunkSize;
int cs = chunkSize == STREAMING ? 4096 : chunkSize;
this.chunk = IOUtils.safelyAllocate(cs, MAX_RECORD_LENGTH);
this.plainByteFlags = new BitSet(cs);
this.plainByteFlags = new SparseBitSet(cs);
this.chunkBits = Integer.bitCount(cs-1);
this.fileOut = TempFile.createTempFile("encrypted_package", "crypt");
this.fileOut.deleteOnExit();
@ -88,7 +88,7 @@ public abstract class ChunkedCipherOutputStream extends FilterOutputStream {
this.chunkSize = chunkSize;
int cs = chunkSize == STREAMING ? 4096 : chunkSize;
this.chunk = IOUtils.safelyAllocate(cs, MAX_RECORD_LENGTH);
this.plainByteFlags = new BitSet(cs);
this.plainByteFlags = new SparseBitSet(cs);
this.chunkBits = Integer.bitCount(cs-1);
this.fileOut = null;
this.dir = null;
@ -283,7 +283,7 @@ public abstract class ChunkedCipherOutputStream extends FilterOutputStream {
return chunk;
}
protected BitSet getPlainByteFlags() {
protected SparseBitSet getPlainByteFlags() {
return plainByteFlags;
}

View File

@ -21,11 +21,11 @@ import java.io.File;
import java.io.IOException;
import java.io.OutputStream;
import java.security.GeneralSecurityException;
import java.util.BitSet;
import javax.crypto.Cipher;
import javax.crypto.spec.SecretKeySpec;
import com.zaxxer.sparsebits.SparseBitSet;
import org.apache.poi.EncryptedDocumentException;
import org.apache.poi.poifs.crypt.ChunkedCipherOutputStream;
import org.apache.poi.poifs.crypt.CryptoFunctions;
@ -139,7 +139,7 @@ public class XOREncryptor extends Encryptor {
final int start = Math.max(posInChunk-(recordEnd-recordStart), 0);
final BitSet plainBytes = getPlainByteFlags();
final SparseBitSet plainBytes = getPlainByteFlags();
final byte[] xorArray = getEncryptionInfo().getEncryptor().getSecretKey().getEncoded();
final byte[] chunk = getChunk();
final byte[] plain = (plainBytes.isEmpty()) ? null : chunk.clone();

View File

@ -25,6 +25,7 @@ import java.util.function.Consumer;
import java.util.function.Function;
import java.util.function.Predicate;
import com.zaxxer.sparsebits.SparseBitSet;
import org.apache.poi.extractor.POITextExtractor;
import org.apache.poi.sl.usermodel.MasterSheet;
import org.apache.poi.sl.usermodel.Notes;
@ -382,7 +383,9 @@ public class SlideShowExtractor<
* @param bold use {@code true} for bold TextRuns, {@code false} for non-bold ones and
* {@code null} if it doesn't matter
* @return a bitset with the marked/used codepoints
* @deprecated use {@link #getCodepointsInSparseBitSet(String, Boolean, Boolean)}
*/
@Deprecated
public BitSet getCodepoints(String typeface, Boolean italic, Boolean bold) {
final BitSet glyphs = new BitSet();
@ -399,6 +402,30 @@ public class SlideShowExtractor<
return glyphs;
}
/**
* Extract the used codepoints for font embedding / subsetting
* @param typeface the typeface/font family of the textruns to examine
* @param italic use {@code true} for italic TextRuns, {@code false} for non-italic ones and
* {@code null} if it doesn't matter
* @param bold use {@code true} for bold TextRuns, {@code false} for non-bold ones and
* {@code null} if it doesn't matter
* @return a bitset with the marked/used codepoints
*/
public SparseBitSet getCodepointsInSparseBitSet(String typeface, Boolean italic, Boolean bold) {
final SparseBitSet glyphs = new SparseBitSet();
Predicate<Object> filterOld = filter;
try {
filter = o -> filterFonts(o, typeface, italic, bold);
slideshow.getSlides().forEach(slide ->
getText(slide, s -> s.codePoints().forEach(glyphs::set))
);
} finally {
filter = filterOld;
}
return glyphs;
}
private static boolean filterFonts(Object o, String typeface, Boolean italic, Boolean bold) {
if (!(o instanceof TextRun)) {
return false;

View File

@ -20,7 +20,6 @@ import java.text.DecimalFormat;
import java.text.DecimalFormatSymbols;
import java.text.FieldPosition;
import java.util.ArrayList;
import java.util.BitSet;
import java.util.Collections;
import java.util.Formatter;
import java.util.Iterator;
@ -30,6 +29,7 @@ import java.util.Locale;
import java.util.Set;
import java.util.TreeSet;
import com.zaxxer.sparsebits.SparseBitSet;
import org.apache.poi.util.LocaleUtil;
import org.apache.poi.util.POILogFactory;
import org.apache.poi.util.POILogger;
@ -475,7 +475,7 @@ public class CellNumberFormatter extends CellFormatter {
Iterator<CellNumberStringMod> changes = mods.iterator();
CellNumberStringMod nextChange = (changes.hasNext() ? changes.next() : null);
// records chars already deleted
BitSet deletedChars = new BitSet();
SparseBitSet deletedChars = new SparseBitSet();
int adjust = 0;
for (Special s : specials) {
int adjustedPos = s.pos + adjust;

View File

@ -18,7 +18,6 @@
package org.apache.poi.openxml4j.opc;
import java.io.Serializable;
import java.util.BitSet;
import java.util.Collection;
import java.util.Collections;
import java.util.HashSet;
@ -28,6 +27,7 @@ import java.util.function.ToIntFunction;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import com.zaxxer.sparsebits.SparseBitSet;
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
import org.apache.poi.openxml4j.exceptions.InvalidOperationException;
@ -139,6 +139,13 @@ public final class PackagePartCollection implements Serializable {
return packagePartLookup.keySet().stream()
.mapToInt(indexFromName)
.collect(BitSet::new, BitSet::set, BitSet::or).nextClearBit(1);
.collect(MySparseBitSet::new, MySparseBitSet::set, MySparseBitSet::myOr).nextClearBit(1);
}
private class MySparseBitSet extends SparseBitSet {
public void myOr(MySparseBitSet other) {
this.or(other);
}
}
}

View File

@ -25,7 +25,6 @@ import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.util.ArrayList;
import java.util.BitSet;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
@ -34,6 +33,7 @@ import java.util.Optional;
import javax.xml.namespace.QName;
import com.zaxxer.sparsebits.SparseBitSet;
import org.apache.poi.ooxml.POIXMLDocumentPart;
import org.apache.poi.ooxml.POIXMLException;
import org.apache.poi.openxml4j.opc.OPCPackage;
@ -82,7 +82,7 @@ implements XSLFShapeContainer, Sheet<XSLFShape,XSLFTextParagraph> {
private Map<Integer, XSLFSimpleShape> _placeholderByIdMap;
private Map<Integer, XSLFSimpleShape> _placeholderByTypeMap;
private final BitSet shapeIds = new BitSet();
private final SparseBitSet shapeIds = new SparseBitSet();
protected XSLFSheet() {
super();

View File

@ -21,13 +21,13 @@ import java.io.IOException;
import java.io.InputStream;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.BitSet;
import java.util.Comparator;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.TreeMap;
import com.zaxxer.sparsebits.SparseBitSet;
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
import org.apache.poi.openxml4j.opc.PackagePart;
import org.apache.poi.openxml4j.opc.PackageRelationship;
@ -43,7 +43,7 @@ import org.apache.poi.xssf.usermodel.XSSFRelation;
@Internal
public class XSSFBHyperlinksTable {
private static final BitSet RECORDS = new BitSet();
private static final SparseBitSet RECORDS = new SparseBitSet();
static {

View File

@ -19,8 +19,8 @@ package org.apache.poi.xssf.binary;
import java.io.IOException;
import java.io.InputStream;
import java.util.BitSet;
import com.zaxxer.sparsebits.SparseBitSet;
import org.apache.poi.util.IOUtils;
import org.apache.poi.util.Internal;
import org.apache.poi.util.LittleEndianInputStream;
@ -39,7 +39,7 @@ public abstract class XSSFBParser {
private static final int MAX_RECORD_LENGTH = 1_000_000;
private final LittleEndianInputStream is;
private final BitSet records;
private final SparseBitSet records;
public XSSFBParser(InputStream is) {
this.is = new LittleEndianInputStream(is);
@ -51,7 +51,7 @@ public abstract class XSSFBParser {
* @param is inputStream
* @param bitSet call {@link #handleRecord(int, byte[])} only on those records in this bitSet
*/
protected XSSFBParser(InputStream is, BitSet bitSet) {
protected XSSFBParser(InputStream is, SparseBitSet bitSet) {
this.is = new LittleEndianInputStream(is);
records = bitSet;
}

View File

@ -20,7 +20,6 @@ import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.BitSet;
import java.util.Collections;
import java.util.HashSet;
import java.util.Iterator;
@ -28,6 +27,7 @@ import java.util.LinkedList;
import java.util.List;
import java.util.Set;
import com.zaxxer.sparsebits.SparseBitSet;
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
import org.apache.poi.openxml4j.opc.OPCPackage;
@ -173,7 +173,7 @@ public class XSSFBReader extends XSSFReader {
private static class PathExtractor extends XSSFBParser {
private static BitSet RECORDS = new BitSet();
private static SparseBitSet RECORDS = new SparseBitSet();
static {
RECORDS.set(XSSFBRecordType.BrtAbsPath15.getId());
}