#64693 - POI HwmfGraphics cannot read the embedded document title

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1881322 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Andreas Beeker 2020-08-30 11:18:21 +00:00
parent f89528addc
commit b00ca445b2
23 changed files with 348 additions and 71 deletions

View File

@ -28,11 +28,11 @@ public interface Drawable {
protected DrawableHint(int id) {
super(id);
}
public boolean isCompatibleValue(Object val) {
return true;
}
public String toString() {
switch (intKey()) {
case 1: return "DRAW_FACTORY";
@ -48,11 +48,12 @@ public interface Drawable {
case 11: return "GRESTORE";
case 12: return "CURRENT_SLIDE";
case 13: return "BUFFERED_IMAGE";
case 14: return "DEFAULT_CHARSET";
default: return "UNKNOWN_ID "+intKey();
}
}
}
/**
* {@link DrawFactory} which will be used to draw objects into this graphics context
*/
@ -96,7 +97,7 @@ public interface Drawable {
* Internal key for caching the preset geometries
*/
DrawableHint PRESET_GEOMETRY_CACHE = new DrawableHint(6);
/**
* draw text via {@link java.awt.Graphics2D#drawString(java.text.AttributedCharacterIterator, float, float)}
*/
@ -110,11 +111,11 @@ public interface Drawable {
/**
* Use this object to resolve unknown / missing fonts when rendering slides.
* The font handler must be of type {@link DrawFontManager}.<p>
*
* In case a {@code FONT_HANDLER} is register, {@code FONT_FALLBACK} and {@code FONT_MAP} are ignored
*
* In case a {@code FONT_HANDLER} is register, {@code FONT_FALLBACK} and {@code FONT_MAP} are ignored
*/
DrawableHint FONT_HANDLER = new DrawableHint(7);
/**
* Key for a font fallback map of type {@code Map<String,String>} which maps
* the original font family (key) to the fallback font family (value).
@ -128,10 +129,10 @@ public interface Drawable {
* the original font family (key) to the mapped font family (value)
*/
DrawableHint FONT_MAP = new DrawableHint(9);
DrawableHint GSAVE = new DrawableHint(10);
DrawableHint GRESTORE = new DrawableHint(11);
/**
* The Common SL Draw API works sometimes cascading, but there are places
* where the current slide context need to be evaluated, e.g. when slide numbers
@ -145,24 +146,32 @@ public interface Drawable {
*/
DrawableHint BUFFERED_IMAGE = new DrawableHint(13);
/**
* Sets the default charset to render text elements.
* Opposed to other windows libraries in POI this simply defaults to Windows-1252.
* The rendering value is of type {@link java.nio.charset.Charset}
*/
DrawableHint DEFAULT_CHARSET = new DrawableHint(14);
/**
* Apply 2-D transforms before drawing this shape. This includes rotation and flipping.
*
* @param graphics the graphics whos transform matrix will be modified
*/
void applyTransform(Graphics2D graphics);
/**
* Draw this shape into the supplied canvas
*
* @param graphics the graphics to draw into
*/
void draw(Graphics2D graphics);
/**
* draw any content within this shape (image, text, etc.).
*
* @param graphics the graphics to draw into
*/
void drawContent(Graphics2D graphics);
void drawContent(Graphics2D graphics);
}

View File

@ -25,6 +25,7 @@ import java.awt.geom.Rectangle2D;
import java.awt.image.BufferedImage;
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.Charset;
import org.apache.poi.common.usermodel.GenericRecord;
import org.apache.poi.util.Dimension2DDouble;
@ -154,4 +155,11 @@ public interface ImageRenderer {
default GenericRecord getGenericRecord() { return null; }
/**
* Sets the default charset to render text elements.
* Opposed to other windows libraries in POI this simply defaults to Windows-1252.
*
* @param defaultCharset the default charset
*/
default void setDefaultCharset(Charset defaultCharset) {}
}

View File

@ -26,6 +26,12 @@ import java.awt.geom.Path2D;
import java.awt.geom.PathIterator;
import java.awt.geom.Point2D;
import java.awt.geom.Rectangle2D;
import java.awt.image.BufferedImage;
import java.awt.image.ColorModel;
import java.awt.image.ComponentColorModel;
import java.awt.image.DirectColorModel;
import java.awt.image.IndexColorModel;
import java.awt.image.PackedColorModel;
import java.io.Closeable;
import java.io.File;
import java.io.FileOutputStream;
@ -58,6 +64,7 @@ public class GenericRecordJsonWriter implements Closeable {
private static final Pattern ESC_CHARS = Pattern.compile("[\"\\p{Cntrl}\\\\]");
private static final String NL = System.getProperty("line.separator");
@FunctionalInterface
protected interface GenericRecordHandler {
/**
@ -92,6 +99,7 @@ public class GenericRecordJsonWriter implements Closeable {
handler(Path2D.class, GenericRecordJsonWriter::printPath);
handler(AffineTransform.class, GenericRecordJsonWriter::printAffineTransform);
handler(Color.class, GenericRecordJsonWriter::printColor);
handler(BufferedImage.class, GenericRecordJsonWriter::printImage);
handler(Array.class, GenericRecordJsonWriter::printArray);
handler(Object.class, GenericRecordJsonWriter::printObject);
}
@ -483,6 +491,40 @@ public class GenericRecordJsonWriter implements Closeable {
return true;
}
protected boolean printImage(String name, Object o) {
BufferedImage img = (BufferedImage)o;
final String[] COLOR_SPACES = {
"XYZ","Lab","Luv","YCbCr","Yxy","RGB","GRAY","HSV","HLS","CMYK","Unknown","CMY","Unknown"
};
final String[] IMAGE_TYPES = {
"CUSTOM","INT_RGB","INT_ARGB","INT_ARGB_PRE","INT_BGR","3BYTE_BGR","4BYTE_ABGR","4BYTE_ABGR_PRE",
"USHORT_565_RGB","USHORT_555_RGB","BYTE_GRAY","USHORT_GRAY","BYTE_BINARY","BYTE_INDEXED"
};
printName(name);
ColorModel cm = img.getColorModel();
String colorType =
(cm instanceof IndexColorModel) ? "indexed" :
(cm instanceof ComponentColorModel) ? "component" :
(cm instanceof DirectColorModel) ? "direct" :
(cm instanceof PackedColorModel) ? "packed" : "unknown";
fw.write(
"{ \"width\": "+img.getWidth()+
", \"height\": "+img.getHeight()+
", \"type\": \""+IMAGE_TYPES[img.getType()]+"\""+
", \"colormodel\": \""+colorType+"\""+
", \"pixelBits\": "+cm.getPixelSize()+
", \"numComponents\": "+cm.getNumComponents()+
", \"colorSpace\": \""+COLOR_SPACES[Math.min(cm.getColorSpace().getType(),12)]+"\""+
", \"transparency\": "+cm.getTransparency()+
", \"alpha\": "+cm.hasAlpha()+
"}"
);
return true;
}
static String trimHex(final long l, final int size) {
final String b = Long.toHexString(l);
int len = b.length();

View File

@ -738,7 +738,9 @@ public class XSLFTextRun implements TextRun {
// SYMBOL is missing
if (font == null || !font.isSetTypeface() || "".equals(font.getTypeface())) {
font = coll.getLatin();
// don't fallback to latin but bubble up in the style hierarchy (slide -> layout -> master -> theme)
return null;
// font = coll.getLatin();
}
}

View File

@ -25,6 +25,7 @@ import java.awt.geom.Rectangle2D;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.Charset;
import java.util.Collections;
import org.apache.poi.common.usermodel.GenericRecord;
@ -111,4 +112,9 @@ class EMFHandler extends MFProxy {
? ((EmbeddedExtractor) imgr).getEmbeddings()
: Collections.emptyList();
}
@Override
void setDefaultCharset(Charset charset) {
imgr.setDefaultCharset(charset);
}
}

View File

@ -25,6 +25,7 @@ import java.io.Closeable;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.Charset;
import java.util.Collections;
import java.util.Set;
@ -64,4 +65,6 @@ abstract class MFProxy implements Closeable {
abstract GenericRecord getRoot();
abstract Iterable<EmbeddedPart> getEmbeddings(int slideNo);
abstract void setDefaultCharset(Charset charset);
}

View File

@ -27,6 +27,7 @@ import java.awt.geom.Dimension2D;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.Charset;
import java.util.Set;
import java.util.Spliterator;
import java.util.Spliterators;
@ -49,12 +50,14 @@ import org.apache.poi.sl.usermodel.SlideShow;
import org.apache.poi.sl.usermodel.SlideShowFactory;
import org.apache.poi.util.IOUtils;
import org.apache.poi.util.Internal;
import org.apache.poi.util.LocaleUtil;
/** Handler for ppt and pptx files */
@Internal
class PPTHandler extends MFProxy {
private SlideShow<?,?> ppt;
private Slide<?,?> slide;
private Charset defaultCharset = LocaleUtil.CHARSET_1252;
@Override
public void parse(File file) throws IOException {
@ -67,6 +70,9 @@ class PPTHandler extends MFProxy {
throw e;
}
}
if (ppt == null) {
throw new IOException("Unknown file format or missing poi-scratchpad.jar / poi-ooxml.jar");
}
slide = ppt.getSlides().get(0);
}
@ -81,6 +87,9 @@ class PPTHandler extends MFProxy {
throw e;
}
}
if (ppt == null) {
throw new IOException("Unknown file format or missing poi-scratchpad.jar / poi-ooxml.jar");
}
slide = ppt.getSlides().get(0);
}
@ -162,8 +171,8 @@ class PPTHandler extends MFProxy {
;
}
private static EmbeddedPart fromObjectShape(Shape s) {
final ObjectShape os = (ObjectShape)s;
private static EmbeddedPart fromObjectShape(Shape<?,?> s) {
final ObjectShape<?,?> os = (ObjectShape<?,?>)s;
final ObjectData od = os.getObjectData();
EmbeddedPart embed = new EmbeddedPart();
embed.setName(od.getFileName());
@ -177,4 +186,8 @@ class PPTHandler extends MFProxy {
});
return embed;
}
@Override
void setDefaultCharset(Charset charset) {
}
}

View File

@ -27,15 +27,18 @@ import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.Charset;
import java.util.Locale;
import java.util.Set;
import java.util.regex.Pattern;
import org.apache.poi.common.usermodel.GenericRecord;
import org.apache.poi.poifs.filesystem.FileMagic;
import org.apache.poi.sl.draw.Drawable;
import org.apache.poi.sl.draw.EmbeddedExtractor.EmbeddedPart;
import org.apache.poi.util.Dimension2DDouble;
import org.apache.poi.util.GenericRecordJsonWriter;
import org.apache.poi.util.LocaleUtil;
import org.apache.poi.xslf.util.OutputFormat.BitmapFormat;
import org.apache.poi.xslf.util.OutputFormat.SVGFormat;
@ -71,7 +74,8 @@ public final class PPTX2PNG {
" -inputType <type> default input file type (OLE2,WMF,EMF), default is OLE2 = Powerpoint\n" +
" some files (usually wmf) don't have a header, i.e. an identifiable file magic\n" +
" -textAsShapes text elements are saved as shapes in SVG, necessary for variable spacing\n" +
" often found in math formulas";
" often found in math formulas\n" +
" -charset sets the default charset to be used, defaults to Windows-1252";
System.out.println(msg);
// no System.exit here, as we also run in junit tests!
@ -99,6 +103,7 @@ public final class PPTX2PNG {
private boolean extractEmbedded = false;
private FileMagic defaultFileType = FileMagic.OLE2;
private boolean textAsShapes = false;
private Charset charset = LocaleUtil.CHARSET_1252;
private PPTX2PNG() {
}
@ -176,6 +181,15 @@ public final class PPTX2PNG {
case "-extractEmbedded":
extractEmbedded = true;
break;
case "-charset":
if (opt != null) {
charset = Charset.forName(opt);
i++;
} else {
charset = LocaleUtil.CHARSET_1252;
}
break;
default:
file = new File(args[i]);
break;
@ -264,6 +278,7 @@ public final class PPTX2PNG {
graphics.setRenderingHint(RenderingHints.KEY_COLOR_RENDERING, RenderingHints.VALUE_COLOR_RENDER_SPEED);
graphics.setRenderingHint(RenderingHints.KEY_INTERPOLATION, RenderingHints.VALUE_INTERPOLATION_BICUBIC);
graphics.setRenderingHint(RenderingHints.KEY_FRACTIONALMETRICS, RenderingHints.VALUE_FRACTIONALMETRICS_ON);
graphics.setRenderingHint(Drawable.DEFAULT_CHARSET, getDefaultCharset());
graphics.scale(scale / lenSide, scale / lenSide);
@ -315,7 +330,7 @@ public final class PPTX2PNG {
}
private void dumpRecords(MFProxy proxy) throws IOException {
if (dumpfile == null) {
if (dumpfile == null || "null".equals(dumpfile.getPath())) {
return;
}
GenericRecord gr = proxy.getRoot();
@ -387,6 +402,7 @@ public final class PPTX2PNG {
proxy.setQuite(quiet);
proxy.parse(file);
}
proxy.setDefaultCharset(charset);
return proxy;
}
@ -400,6 +416,10 @@ public final class PPTX2PNG {
return INPUT_PATTERN.matcher(inname).replaceAll(outpat);
}
private Charset getDefaultCharset() {
return charset;
}
static class NoScratchpadException extends IOException {
NoScratchpadException() {
}

View File

@ -19,10 +19,13 @@
package org.apache.poi.xslf.usermodel;
import static java.util.Arrays.asList;
import static org.junit.Assume.assumeFalse;
import java.io.File;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.function.Function;
import java.util.stream.Collectors;
import java.util.stream.Stream;
@ -39,13 +42,14 @@ import org.junit.runners.Parameterized.Parameters;
/**
* Test class for testing PPTX2PNG utility which renders .ppt and .pptx slideshows
*/
@SuppressWarnings("ConstantConditions")
@RunWith(Parameterized.class)
public class TestPPTX2PNG {
private static boolean xslfOnly;
private static final POIDataSamples samples = POIDataSamples.getSlideShowInstance();
private static final File basedir = null;
private static final String files =
"53446.ppt, alterman_security.ppt, alterman_security.pptx, KEY02.pptx, themes.pptx, " +
"bug64693.pptx, 53446.ppt, alterman_security.ppt, alterman_security.pptx, KEY02.pptx, themes.pptx, " +
"backgrounds.pptx, layouts.pptx, sample.pptx, shapes.pptx, 54880_chinese.ppt, keyframes.pptx," +
"customGeo.pptx, customGeo.ppt, wrench.emf, santa.wmf, missing-moveto.ppt";
@ -62,7 +66,6 @@ public class TestPPTX2PNG {
@Parameter
public String pptFile;
@SuppressWarnings("ConstantConditions")
@Parameters(name="{0}")
public static Collection<String> data() {
Function<String, Stream<String>> fun = (basedir == null) ? Stream::of :
@ -75,7 +78,9 @@ public class TestPPTX2PNG {
public void render() throws Exception {
assumeFalse("ignore HSLF (.ppt) / HEMF (.emf) / HWMF (.wmf) files in no-scratchpad run", xslfOnly && pptFile.matches(".*\\.(ppt|emf|wmf)$"));
String[] args = {
// bug64693.pptx
final List<String> args = new ArrayList<>(asList(
"-format", "null", // png,gif,jpg,svg or null for test
"-slide", "-1", // -1 for all
"-outdir", new File("build/tmp/").getCanonicalPath(),
@ -84,10 +89,17 @@ public class TestPPTX2PNG {
"-dump", "null",
"-quiet",
"-fixside", "long",
"-scale", "800",
// "-scale", "1.333333333",
(basedir == null ? samples.getFile(pptFile) : new File(basedir, pptFile)).getAbsolutePath()
};
PPTX2PNG.main(args);
"-scale", "800"
));
if ("bug64693.pptx".equals(pptFile)) {
args.addAll(asList(
"-charset", "GBK"
));
}
args.add((basedir == null ? samples.getFile(pptFile) : new File(basedir, pptFile)).getAbsolutePath());
PPTX2PNG.main(args.toArray(new String[0]));
}
}

View File

@ -139,4 +139,7 @@ java.lang.String#toString()
javax.xml.bind.DatatypeConverter
@defaultMessage don't rely on the threads ContextClassLoader - provide the classloader via load(Class, Classloader)
java.util.ServiceLoader#load(java.lang.Class)
java.util.ServiceLoader#load(java.lang.Class)
@defaultMessage use java.nio.charset.StandardCharsets instead
org.apache.commons.codec.Charsets

View File

@ -28,12 +28,14 @@ import java.awt.image.BufferedImage;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.Charset;
import org.apache.poi.common.usermodel.GenericRecord;
import org.apache.poi.hemf.usermodel.HemfPicture;
import org.apache.poi.hwmf.draw.HwmfGraphicsState;
import org.apache.poi.hwmf.draw.HwmfImageRenderer;
import org.apache.poi.sl.draw.BitmapImageRenderer;
import org.apache.poi.sl.draw.Drawable;
import org.apache.poi.sl.draw.EmbeddedExtractor;
import org.apache.poi.sl.draw.ImageRenderer;
import org.apache.poi.sl.usermodel.PictureData;
@ -43,6 +45,7 @@ import org.apache.poi.util.Units;
public class HemfImageRenderer implements ImageRenderer, EmbeddedExtractor {
HemfPicture image;
double alpha;
boolean charsetInitialized = false;
@Override
public boolean canRender(String contentType) {
@ -104,6 +107,11 @@ public class HemfImageRenderer implements ImageRenderer, EmbeddedExtractor {
return false;
}
Charset cs = (Charset)graphics.getRenderingHint(Drawable.DEFAULT_CHARSET);
if (cs != null && !charsetInitialized) {
setDefaultCharset(cs);
}
HwmfGraphicsState graphicsState = new HwmfGraphicsState();
graphicsState.backup(graphics);
@ -141,4 +149,10 @@ public class HemfImageRenderer implements ImageRenderer, EmbeddedExtractor {
public Rectangle2D getBounds() {
return Units.pointsToPixel(image == null ? new Rectangle2D.Double() : image.getBoundsInPoints());
}
@Override
public void setDefaultCharset(Charset defaultCharset) {
image.setDefaultCharset(defaultCharset);
charsetInitialized = true;
}
}

View File

@ -19,6 +19,7 @@ package org.apache.poi.hemf.record.emf;
import java.awt.geom.Rectangle2D;
import java.io.IOException;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Collections;
@ -33,6 +34,7 @@ import org.apache.poi.hemf.draw.HemfGraphics;
import org.apache.poi.hemf.draw.HemfGraphics.EmfRenderState;
import org.apache.poi.hemf.record.emfplus.HemfPlusRecord;
import org.apache.poi.hemf.record.emfplus.HemfPlusRecordIterator;
import org.apache.poi.hwmf.usermodel.HwmfCharsetAware;
import org.apache.poi.hwmf.usermodel.HwmfPicture;
import org.apache.poi.util.GenericRecordJsonWriter;
import org.apache.poi.util.GenericRecordUtil;
@ -105,7 +107,7 @@ public class HemfComment {
}
}
public static class EmfComment implements HemfRecord {
public static class EmfComment implements HemfRecord, HwmfCharsetAware {
private EmfCommentData data;
@Override
@ -146,6 +148,13 @@ public class HemfComment {
}
assert(commentIdentifier == commentType.id);
}
@Override
public void setCharsetProvider(Supplier<Charset> provider) {
if (data instanceof HwmfCharsetAware) {
((HwmfCharsetAware)data).setCharsetProvider(provider);
}
}
}
public static class EmfCommentDataIterator implements Iterator<EmfCommentData> {
@ -250,8 +259,9 @@ public class HemfComment {
* Private data is unknown to EMF; it is meaningful only to applications that know the format of the
* data and how to use it. EMR_COMMENT private data records MAY be ignored.
*/
public static class EmfCommentDataGeneric implements EmfCommentData {
public static class EmfCommentDataGeneric implements EmfCommentData, HwmfCharsetAware {
private byte[] privateData;
private Supplier<Charset> charsetProvider = () -> LocaleUtil.CHARSET_1252;
@Override
public HemfCommentRecordType getCommentRecordType() {
@ -275,7 +285,7 @@ public class HemfComment {
}
public String getPrivateDataAsString() {
return new String(privateData, LocaleUtil.CHARSET_1252);
return new String(privateData, charsetProvider.get());
}
@Override
@ -285,6 +295,11 @@ public class HemfComment {
"privateDataAsString", this::getPrivateDataAsString
);
}
@Override
public void setCharsetProvider(Supplier<Charset> provider) {
charsetProvider = provider;
}
}
/** The EMR_COMMENT_EMFPLUS record contains embedded EMF+ records. */

View File

@ -206,6 +206,7 @@ public class HemfText {
// the axis to convert from page space units to .01mm units.
// This SHOULD be used only if the graphics mode specified by iGraphicsMode is GM_COMPATIBLE.
Dimension2D scl = graphicsMode == EmfGraphicsMode.GM_COMPATIBLE ? scale : null;
ctx.setCharsetProvider(charsetProvider);
ctx.drawString(rawTextBytes, stringLength, reference, scl, bounds, options, dx, isUnicode());
}

View File

@ -28,6 +28,7 @@ import java.awt.geom.Rectangle2D;
import java.io.IOException;
import java.math.BigDecimal;
import java.math.RoundingMode;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Collections;
import java.util.LinkedHashMap;
@ -37,7 +38,6 @@ import java.util.PrimitiveIterator.OfInt;
import java.util.function.BiFunction;
import java.util.function.Supplier;
import org.apache.commons.codec.Charsets;
import org.apache.commons.math3.linear.LUDecomposition;
import org.apache.commons.math3.linear.MatrixUtils;
import org.apache.commons.math3.linear.RealMatrix;
@ -748,12 +748,12 @@ public final class HemfPlusDraw {
}
if (REALIZED_ADVANCE.isSet(optionsFlags)) {
byte[] buf = glyphs.getBytes(Charsets.UTF_16LE);
byte[] buf = glyphs.getBytes(StandardCharsets.UTF_16LE);
ctx.drawString(buf, buf.length, glyphPos.get(0), null, null, null, null, true);
} else {
final OfInt glyphIter = glyphs.codePoints().iterator();
glyphPos.forEach(p -> {
byte[] buf = new String(new int[]{glyphIter.next()}, 0, 1).getBytes(Charsets.UTF_16LE);
byte[] buf = new String(new int[]{glyphIter.next()}, 0, 1).getBytes(StandardCharsets.UTF_16LE);
ctx.drawString(buf, buf.length, p, null, null, null, null, true);
});
}

View File

@ -26,6 +26,7 @@ import java.awt.geom.AffineTransform;
import java.awt.geom.Dimension2D;
import java.awt.geom.Rectangle2D;
import java.io.InputStream;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
@ -41,10 +42,12 @@ import org.apache.poi.hemf.record.emf.HemfHeader;
import org.apache.poi.hemf.record.emf.HemfRecord;
import org.apache.poi.hemf.record.emf.HemfRecordIterator;
import org.apache.poi.hemf.record.emf.HemfWindowing;
import org.apache.poi.hwmf.usermodel.HwmfCharsetAware;
import org.apache.poi.hwmf.usermodel.HwmfEmbedded;
import org.apache.poi.util.Dimension2DDouble;
import org.apache.poi.util.Internal;
import org.apache.poi.util.LittleEndianInputStream;
import org.apache.poi.util.LocaleUtil;
import org.apache.poi.util.Units;
/**
@ -55,6 +58,7 @@ public class HemfPicture implements Iterable<HemfRecord>, GenericRecord {
private final LittleEndianInputStream stream;
private final List<HemfRecord> records = new ArrayList<>();
private boolean isParsed = false;
private Charset defaultCharset = LocaleUtil.CHARSET_1252;
public HemfPicture(InputStream is) {
this(new LittleEndianInputStream(is));
@ -79,6 +83,9 @@ public class HemfPicture implements Iterable<HemfRecord>, GenericRecord {
header[0] = (HemfHeader) r;
}
r.setHeader(header[0]);
if (r instanceof HwmfCharsetAware) {
((HwmfCharsetAware)r).setCharsetProvider(this::getDefaultCharset);
}
records.add(r);
});
}
@ -199,4 +206,12 @@ public class HemfPicture implements Iterable<HemfRecord>, GenericRecord {
public Map<String, Supplier<?>> getGenericProperties() {
return null;
}
public void setDefaultCharset(Charset defaultCharset) {
this.defaultCharset = defaultCharset;
}
public Charset getDefaultCharset() {
return defaultCharset;
}
}

View File

@ -40,6 +40,7 @@ import java.awt.geom.Point2D;
import java.awt.geom.Rectangle2D;
import java.awt.image.BufferedImage;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.text.AttributedString;
import java.util.ArrayList;
import java.util.BitSet;
@ -51,8 +52,9 @@ import java.util.NoSuchElementException;
import java.util.Objects;
import java.util.TreeMap;
import java.util.function.BiConsumer;
import java.util.function.Supplier;
import org.apache.commons.codec.Charsets;
import org.apache.poi.common.usermodel.fonts.FontCharset;
import org.apache.poi.common.usermodel.fonts.FontInfo;
import org.apache.poi.hwmf.record.HwmfBrushStyle;
import org.apache.poi.hwmf.record.HwmfFont;
@ -64,6 +66,7 @@ import org.apache.poi.hwmf.record.HwmfPenStyle;
import org.apache.poi.hwmf.record.HwmfPenStyle.HwmfLineDash;
import org.apache.poi.hwmf.record.HwmfRegionMode;
import org.apache.poi.hwmf.record.HwmfText.WmfExtTextOutOptions;
import org.apache.poi.hwmf.usermodel.HwmfCharsetAware;
import org.apache.poi.sl.draw.BitmapImageRenderer;
import org.apache.poi.sl.draw.DrawFactory;
import org.apache.poi.sl.draw.DrawFontManager;
@ -72,7 +75,7 @@ import org.apache.poi.sl.draw.ImageRenderer;
import org.apache.poi.util.Internal;
import org.apache.poi.util.LocaleUtil;
public class HwmfGraphics {
public class HwmfGraphics implements HwmfCharsetAware {
public enum FillDrawStyle {
NONE(FillDrawStyle::fillNone),
@ -128,9 +131,9 @@ public class HwmfGraphics {
private final AffineTransform initialAT = new AffineTransform();
private static final Charset DEFAULT_CHARSET = LocaleUtil.CHARSET_1252;
/** Bounding box from the placeable header */
private final Rectangle2D bbox;
private Supplier<Charset> charsetProvider = () -> LocaleUtil.CHARSET_1252;
/**
* Initialize a graphics context for wmf rendering
@ -595,16 +598,26 @@ public class HwmfGraphics {
}
}
private static Charset getCharset(HwmfFont font, boolean isUnicode) {
private Charset getCharset(HwmfFont font, boolean isUnicode) {
if (isUnicode) {
return Charsets.UTF_16LE;
return StandardCharsets.UTF_16LE;
}
Charset charset = font.getCharset().getCharset();
return (charset == null) ? DEFAULT_CHARSET : charset;
FontCharset fc = font.getCharset();
if (fc == FontCharset.DEFAULT) {
return charsetProvider.get();
}
Charset charset = fc.getCharset();
return (charset == null) ? charsetProvider.get() : charset;
}
private static String trimText(HwmfFont font, boolean isUnicode, byte[] text, int length) {
@Override
public void setCharsetProvider(Supplier<Charset> provider) {
charsetProvider = provider;
}
private String trimText(HwmfFont font, boolean isUnicode, byte[] text, int length) {
final Charset charset = getCharset(font, isUnicode);
int trimLen;
@ -717,7 +730,9 @@ public class HwmfGraphics {
graphicsCtx.fill(dstBounds);
break;
default:
case SRCAND:
case SRCCOPY:
case SRCINVERT:
if (img == null) {
return;
}
@ -746,7 +761,20 @@ public class HwmfGraphics {
// the difference is, that clippings are 0-based, whereas the srcBounds are absolute in the user-space
// of the referenced image and can be also negative
Composite old = graphicsCtx.getComposite();
graphicsCtx.setComposite(AlphaComposite.getInstance(AlphaComposite.SRC_OVER));
int newComp;
switch (prop.getRasterOp()) {
default:
case SRCCOPY:
newComp = AlphaComposite.SRC_OVER;
break;
case SRCINVERT:
newComp = AlphaComposite.SRC_IN;
break;
case SRCAND:
newComp = AlphaComposite.SRC;
break;
}
graphicsCtx.setComposite(AlphaComposite.getInstance(newComp));
boolean useDeviceBounds = (img instanceof HwmfImageRenderer);

View File

@ -27,6 +27,7 @@ import java.awt.image.BufferedImage;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.Charset;
import java.util.Iterator;
import org.apache.poi.common.usermodel.GenericRecord;
@ -34,6 +35,7 @@ import org.apache.poi.hwmf.usermodel.HwmfEmbedded;
import org.apache.poi.hwmf.usermodel.HwmfPicture;
import org.apache.poi.sl.draw.BitmapImageRenderer;
import org.apache.poi.sl.draw.DrawPictureShape;
import org.apache.poi.sl.draw.Drawable;
import org.apache.poi.sl.draw.EmbeddedExtractor;
import org.apache.poi.sl.draw.ImageRenderer;
import org.apache.poi.sl.usermodel.PictureData.PictureType;
@ -47,6 +49,7 @@ import org.apache.poi.util.Units;
public class HwmfImageRenderer implements ImageRenderer, EmbeddedExtractor {
HwmfPicture image;
double alpha;
boolean charsetInitialized = false;
@Override
public boolean canRender(String contentType) {
@ -87,9 +90,9 @@ public class HwmfImageRenderer implements ImageRenderer, EmbeddedExtractor {
@Override
public BufferedImage getImage(Dimension2D dim) {
if (image == null) {
return new BufferedImage(1, 1, BufferedImage.TYPE_INT_ARGB);
return new BufferedImage(1, 1, BufferedImage.TYPE_INT_ARGB);
}
BufferedImage bufImg = new BufferedImage((int)dim.getWidth(), (int)dim.getHeight(), BufferedImage.TYPE_INT_ARGB);
Graphics2D g = bufImg.createGraphics();
g.setRenderingHint(RenderingHints.KEY_ANTIALIASING, RenderingHints.VALUE_ANTIALIAS_ON);
@ -101,7 +104,7 @@ public class HwmfImageRenderer implements ImageRenderer, EmbeddedExtractor {
return BitmapImageRenderer.setAlpha(bufImg, alpha);
}
@Override
public boolean drawImage(Graphics2D graphics, Rectangle2D anchor) {
return drawImage(graphics, anchor, null);
@ -113,6 +116,11 @@ public class HwmfImageRenderer implements ImageRenderer, EmbeddedExtractor {
return false;
}
Charset cs = (Charset)graphics.getRenderingHint(Drawable.DEFAULT_CHARSET);
if (cs != null && !charsetInitialized) {
setDefaultCharset(cs);
}
HwmfGraphicsState graphicsState = new HwmfGraphicsState();
graphicsState.backup(graphics);
@ -185,4 +193,10 @@ public class HwmfImageRenderer implements ImageRenderer, EmbeddedExtractor {
public Rectangle2D getBounds() {
return Units.pointsToPixel(image == null ? new Rectangle2D.Double() : image.getBoundsInPoints());
}
@Override
public void setDefaultCharset(Charset defaultCharset) {
image.setDefaultCharset(defaultCharset);
charsetInitialized = true;
}
}

View File

@ -485,11 +485,14 @@ public class HwmfBitmapDib implements GenericRecord {
if (foreground != null && background != null && headerBitCount == HwmfBitmapDib.BitCount.BI_BITCOUNT_1) {
IndexColorModel cmOld = (IndexColorModel)bi.getColorModel();
int transPixel = hasAlpha ? (((cmOld.getRGB(0) & 0xFFFFFF) == 0) ? 0 : 1) : -1;
int fg = foreground.getRGB();
int bg = background.getRGB() & (hasAlpha ? 0xFFFFFF : 0xFFFFFFFF);
boolean ordered = (cmOld.getRGB(0) & 0xFFFFFF) == (bg & 0xFFFFFF);
int transPixel = ordered ? 0 : 1;
int[] cmap = ordered ? new int[]{ bg, fg } : new int[]{ fg, bg };
int transferType = bi.getData().getTransferType();
int fg = foreground.getRGB(), bg = background.getRGB();
int[] cmap = { (transPixel == 0 ? bg : fg), (transPixel == 1 ? bg : fg) };
IndexColorModel cmNew = new IndexColorModel(1, cmap.length, cmap, 0, hasAlpha, transPixel, transferType);
IndexColorModel cmNew = new IndexColorModel(1, 2, cmap, 0, hasAlpha, transPixel, transferType);
bi = new BufferedImage(cmNew, bi.getRaster(), false, null);
}

View File

@ -35,6 +35,7 @@ import org.apache.poi.common.usermodel.GenericRecord;
import org.apache.poi.hwmf.draw.HwmfDrawProperties;
import org.apache.poi.hwmf.draw.HwmfGraphics;
import org.apache.poi.hwmf.record.HwmfMisc.WmfSetMapMode;
import org.apache.poi.hwmf.usermodel.HwmfCharsetAware;
import org.apache.poi.util.BitField;
import org.apache.poi.util.BitFieldFactory;
import org.apache.poi.util.GenericRecordJsonWriter;
@ -42,6 +43,7 @@ import org.apache.poi.util.GenericRecordUtil;
import org.apache.poi.util.IOUtils;
import org.apache.poi.util.LittleEndianConsts;
import org.apache.poi.util.LittleEndianInputStream;
import org.apache.poi.util.LocaleUtil;
import org.apache.poi.util.POILogFactory;
import org.apache.poi.util.POILogger;
@ -172,7 +174,7 @@ public class HwmfText {
* The META_TEXTOUT record outputs a character string at the specified location by using the font,
* background color, and text color that are defined in the playback device context.
*/
public static class WmfTextOut implements HwmfRecord {
public static class WmfTextOut implements HwmfRecord, HwmfCharsetAware {
/**
* A 16-bit signed integer that defines the length of the string, in bytes, pointed to by String.
*/
@ -189,6 +191,8 @@ public class HwmfText {
protected Point2D reference = new Point2D.Double();
protected Supplier<Charset> charsetProvider = () -> LocaleUtil.CHARSET_1252;
@Override
public HwmfRecordType getWmfRecordType() {
return HwmfRecordType.textOut;
@ -211,6 +215,7 @@ public class HwmfText {
@Override
public void draw(HwmfGraphics ctx) {
ctx.setCharsetProvider(charsetProvider);
ctx.drawString(getTextBytes(), stringLength, reference);
}
@ -235,6 +240,11 @@ public class HwmfText {
"reference", () -> reference
);
}
@Override
public void setCharsetProvider(Supplier<Charset> provider) {
charsetProvider = provider;
}
}
@SuppressWarnings("unused")
@ -343,7 +353,7 @@ public class HwmfText {
* are defined in the playback device context. Optionally, dimensions can be provided for clipping,
* opaquing, or both.
*/
public static class WmfExtTextOut implements HwmfRecord {
public static class WmfExtTextOut implements HwmfRecord, HwmfCharsetAware {
/**
* The location, in logical units, where the text string is to be placed.
*/
@ -383,6 +393,8 @@ public class HwmfText {
*/
protected final List<Integer> dx = new ArrayList<>();
protected Supplier<Charset> charsetProvider = () -> LocaleUtil.CHARSET_1252;
public WmfExtTextOut() {
this(new WmfExtTextOutOptions());
}
@ -437,6 +449,7 @@ public class HwmfText {
@Override
public void draw(HwmfGraphics ctx) {
ctx.setCharsetProvider(charsetProvider);
ctx.drawString(rawTextBytes, stringLength, reference, null, bounds, options, dx, false);
}
@ -445,8 +458,7 @@ public class HwmfText {
return "";
}
String ret = new String(rawTextBytes, charset);
return ret.substring(0,
Math.min(ret.length(), stringLength));
return ret.substring(0, Math.min(ret.length(), stringLength));
}
public Point2D getReference() {
@ -468,7 +480,7 @@ public class HwmfText {
private String getGenericText() {
try {
return getText(isUnicode() ? StandardCharsets.UTF_16LE : StandardCharsets.US_ASCII);
return getText(isUnicode() ? StandardCharsets.UTF_16LE : charsetProvider.get());
} catch (IOException e) {
return "";
}
@ -483,6 +495,11 @@ public class HwmfText {
"dx", () -> dx
);
}
@Override
public void setCharsetProvider(Supplier<Charset> provider) {
charsetProvider = provider;
}
}
public enum HwmfTextAlignment {

View File

@ -0,0 +1,30 @@
/*
* ====================================================================
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* ====================================================================
*/
package org.apache.poi.hwmf.usermodel;
import java.nio.charset.Charset;
import java.util.function.Supplier;
/**
* Helper interface to provide a default charset to records which would depend on a system charset
*/
public interface HwmfCharsetAware {
void setCharsetProvider(Supplier<Charset> provider);
}

View File

@ -24,6 +24,7 @@ import java.awt.geom.Dimension2D;
import java.awt.geom.Rectangle2D;
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Iterator;
@ -46,6 +47,7 @@ import org.apache.poi.hwmf.record.HwmfWindowing.WmfSetWindowOrg;
import org.apache.poi.util.Dimension2DDouble;
import org.apache.poi.util.IOUtils;
import org.apache.poi.util.LittleEndianInputStream;
import org.apache.poi.util.LocaleUtil;
import org.apache.poi.util.POILogFactory;
import org.apache.poi.util.POILogger;
import org.apache.poi.util.RecordFormatException;
@ -56,11 +58,13 @@ public class HwmfPicture implements Iterable<HwmfRecord>, GenericRecord {
public static final int MAX_RECORD_LENGTH = 50_000_000;
private static final POILogger logger = POILogFactory.getLogger(HwmfPicture.class);
final List<HwmfRecord> records = new ArrayList<>();
final HwmfPlaceableHeader placeableHeader;
final HwmfHeader header;
/** The default charset */
private Charset defaultCharset = LocaleUtil.CHARSET_1252;
public HwmfPicture(InputStream inputStream) throws IOException {
try (LittleEndianInputStream leis = new LittleEndianInputStream(inputStream)) {
@ -110,6 +114,10 @@ public class HwmfPicture implements Iterable<HwmfRecord>, GenericRecord {
throw new RecordFormatException("Tried to skip "+remainingSize + " but skipped: "+skipped);
}
}
if (wr instanceof HwmfCharsetAware) {
((HwmfCharsetAware)wr).setCharsetProvider(this::getDefaultCharset);
}
}
}
}
@ -126,7 +134,7 @@ public class HwmfPicture implements Iterable<HwmfRecord>, GenericRecord {
Rectangle2D bounds = new Rectangle2D.Double(0,0,width,height);
draw(ctx, bounds);
}
public void draw(Graphics2D ctx, Rectangle2D graphicsBounds) {
HwmfGraphicsState state = new HwmfGraphicsState();
state.backup(ctx);
@ -198,7 +206,7 @@ public class HwmfPicture implements Iterable<HwmfRecord>, GenericRecord {
}
if (wOrg != null && wExt != null) {
return new Rectangle2D.Double(wOrg.getX(), wOrg.getY(), wExt.getSize().getWidth(), wExt.getSize().getHeight());
}
}
}
return null;
}
@ -260,4 +268,12 @@ public class HwmfPicture implements Iterable<HwmfRecord>, GenericRecord {
public List<? extends GenericRecord> getGenericChildren() {
return getRecords();
}
public void setDefaultCharset(Charset defaultCharset) {
this.defaultCharset = defaultCharset;
}
public Charset getDefaultCharset() {
return defaultCharset;
}
}

View File

@ -17,10 +17,13 @@
package org.apache.poi.poifs.filesystem;
import org.apache.commons.codec.Charsets;
import org.apache.poi.POIDataSamples;
import org.apache.poi.util.TempFile;
import org.junit.Test;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertNotSame;
import static org.junit.Assert.assertSame;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;
import java.io.BufferedInputStream;
import java.io.File;
@ -28,32 +31,35 @@ import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.StandardCharsets;
import java.util.Arrays;
import java.util.Random;
import static org.junit.Assert.*;
import org.apache.poi.POIDataSamples;
import org.apache.poi.util.TempFile;
import org.junit.Test;
public class TestFileMagic {
@Test
public void testFileMagic() {
assertEquals(FileMagic.XML, FileMagic.valueOf("XML"));
assertEquals(FileMagic.XML, FileMagic.valueOf("<?xml".getBytes(Charsets.UTF_8)));
assertEquals(FileMagic.XML, FileMagic.valueOf("<?xml".getBytes(StandardCharsets.UTF_8)));
assertEquals(FileMagic.HTML, FileMagic.valueOf("HTML"));
assertEquals(FileMagic.HTML, FileMagic.valueOf("<!DOCTYP".getBytes(Charsets.UTF_8)));
assertEquals(FileMagic.HTML, FileMagic.valueOf("<!DOCTYPE".getBytes(Charsets.UTF_8)));
assertEquals(FileMagic.HTML, FileMagic.valueOf("<html".getBytes(Charsets.UTF_8)));
assertEquals(FileMagic.HTML, FileMagic.valueOf("\n\r<html".getBytes(Charsets.UTF_8)));
assertEquals(FileMagic.HTML, FileMagic.valueOf("\n<html".getBytes(Charsets.UTF_8)));
assertEquals(FileMagic.HTML, FileMagic.valueOf("\r\n<html".getBytes(Charsets.UTF_8)));
assertEquals(FileMagic.HTML, FileMagic.valueOf("\r<html".getBytes(Charsets.UTF_8)));
assertEquals(FileMagic.HTML, FileMagic.valueOf("<!DOCTYP".getBytes(StandardCharsets.UTF_8)));
assertEquals(FileMagic.HTML, FileMagic.valueOf("<!DOCTYPE".getBytes(StandardCharsets.UTF_8)));
assertEquals(FileMagic.HTML, FileMagic.valueOf("<html".getBytes(StandardCharsets.UTF_8)));
assertEquals(FileMagic.HTML, FileMagic.valueOf("\n\r<html".getBytes(StandardCharsets.UTF_8)));
assertEquals(FileMagic.HTML, FileMagic.valueOf("\n<html".getBytes(StandardCharsets.UTF_8)));
assertEquals(FileMagic.HTML, FileMagic.valueOf("\r\n<html".getBytes(StandardCharsets.UTF_8)));
assertEquals(FileMagic.HTML, FileMagic.valueOf("\r<html".getBytes(StandardCharsets.UTF_8)));
assertEquals(FileMagic.JPEG, FileMagic.valueOf(new byte[]{ (byte)0xFF, (byte)0xD8, (byte)0xFF, (byte)0xDB }));
assertEquals(FileMagic.JPEG, FileMagic.valueOf(new byte[]{ (byte)0xFF, (byte)0xD8, (byte)0xFF, (byte)0xE0, 'a', 'b', 'J', 'F', 'I', 'F', 0x00, 0x01 }));
assertEquals(FileMagic.JPEG, FileMagic.valueOf(new byte[]{ (byte)0xFF, (byte)0xD8, (byte)0xFF, (byte)0xEE }));
assertEquals(FileMagic.JPEG, FileMagic.valueOf(new byte[]{ (byte)0xFF, (byte)0xD8, (byte)0xFF, (byte)0xE1, 'd', 'c', 'E', 'x', 'i', 'f', 0x00, 0x00 }));
assertEquals(FileMagic.UNKNOWN, FileMagic.valueOf("something".getBytes(Charsets.UTF_8)));
assertEquals(FileMagic.UNKNOWN, FileMagic.valueOf("something".getBytes(StandardCharsets.UTF_8)));
assertEquals(FileMagic.UNKNOWN, FileMagic.valueOf(new byte[0]));
try {

Binary file not shown.