mirror of https://github.com/apache/poi.git
[github-149] improve MAPIMessage.guess7BitEncoding, improve MAPIMessage.getHtmlBody. Thanks to Dominik Hölzl. This closes #149
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1860043 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
3d5671ac5e
commit
f74a8f9abb
|
@ -616,5 +616,473 @@ public final class LocaleUtil {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get default code page from LCID value
|
||||||
|
*
|
||||||
|
* @param lcid the LCID value
|
||||||
|
* @return the default code page
|
||||||
|
*/
|
||||||
|
public static int getDefaultCodePageFromLCID(int lcid) {
|
||||||
|
int languageId = lcid & 0xFFFF;
|
||||||
|
switch (languageId) {
|
||||||
|
case 0x0001: return 1256;
|
||||||
|
case 0x0002: return 1251;
|
||||||
|
case 0x0003: return 1252;
|
||||||
|
case 0x0004: return 936;
|
||||||
|
case 0x0005: return 1250;
|
||||||
|
case 0x0006: return 1252;
|
||||||
|
case 0x0007: return 1252;
|
||||||
|
case 0x0008: return 1253;
|
||||||
|
case 0x0009: return 1252;
|
||||||
|
case 0x000a: return 1252;
|
||||||
|
case 0x000b: return 1252;
|
||||||
|
case 0x000c: return 1252;
|
||||||
|
case 0x000d: return 1255;
|
||||||
|
case 0x000e: return 1250;
|
||||||
|
case 0x000f: return 1252;
|
||||||
|
case 0x0010: return 1252;
|
||||||
|
case 0x0011: return 932;
|
||||||
|
case 0x0012: return 949;
|
||||||
|
case 0x0013: return 1252;
|
||||||
|
case 0x0014: return 1252;
|
||||||
|
case 0x0015: return 1250;
|
||||||
|
case 0x0016: return 1252;
|
||||||
|
case 0x0017: return 1252;
|
||||||
|
case 0x0018: return 1250;
|
||||||
|
case 0x0019: return 1251;
|
||||||
|
case 0x001a: return 1250;
|
||||||
|
case 0x001b: return 1250;
|
||||||
|
case 0x001c: return 1250;
|
||||||
|
case 0x001d: return 1252;
|
||||||
|
case 0x001e: return 874;
|
||||||
|
case 0x001f: return 1254;
|
||||||
|
case 0x0020: return 1256;
|
||||||
|
case 0x0021: return 1252;
|
||||||
|
case 0x0022: return 1251;
|
||||||
|
case 0x0023: return 1251;
|
||||||
|
case 0x0024: return 1250;
|
||||||
|
case 0x0025: return 1257;
|
||||||
|
case 0x0026: return 1257;
|
||||||
|
case 0x0027: return 1257;
|
||||||
|
case 0x0028: return 1251;
|
||||||
|
case 0x0029: return 1256;
|
||||||
|
case 0x002a: return 1258;
|
||||||
|
case 0x002b: return 0;
|
||||||
|
case 0x002c: return 1254;
|
||||||
|
case 0x002d: return 1252;
|
||||||
|
case 0x002e: return 1252;
|
||||||
|
case 0x002f: return 1251;
|
||||||
|
case 0x0030: return 0;
|
||||||
|
case 0x0031: return 0;
|
||||||
|
case 0x0032: return 1252;
|
||||||
|
case 0x0033: return 32759;
|
||||||
|
case 0x0034: return 1252;
|
||||||
|
case 0x0035: return 1252;
|
||||||
|
case 0x0036: return 1252;
|
||||||
|
case 0x0037: return 0;
|
||||||
|
case 0x0038: return 1252;
|
||||||
|
case 0x0039: return 0;
|
||||||
|
case 0x003a: return 0;
|
||||||
|
case 0x003b: return 1252;
|
||||||
|
case 0x003c: return 1252;
|
||||||
|
case 0x003d: return 32759;
|
||||||
|
case 0x003e: return 1252;
|
||||||
|
case 0x003f: return 0;
|
||||||
|
case 0x0040: return 1251;
|
||||||
|
case 0x0041: return 1252;
|
||||||
|
case 0x0042: return 1250;
|
||||||
|
case 0x0043: return 1254;
|
||||||
|
case 0x0044: return 1251;
|
||||||
|
case 0x0045: return 0;
|
||||||
|
case 0x0046: return 0;
|
||||||
|
case 0x0047: return 0;
|
||||||
|
case 0x0048: return 0;
|
||||||
|
case 0x0049: return 0;
|
||||||
|
case 0x004a: return 0;
|
||||||
|
case 0x004b: return 0;
|
||||||
|
case 0x004c: return 0;
|
||||||
|
case 0x004d: return 0;
|
||||||
|
case 0x004e: return 0;
|
||||||
|
case 0x004f: return 0;
|
||||||
|
case 0x0050: return 1251;
|
||||||
|
case 0x0051: return 0;
|
||||||
|
case 0x0052: return 1252;
|
||||||
|
case 0x0053: return 0;
|
||||||
|
case 0x0054: return 0;
|
||||||
|
case 0x0055: return 0;
|
||||||
|
case 0x0056: return 1252;
|
||||||
|
case 0x0057: return 0;
|
||||||
|
case 0x0058: return 32759;
|
||||||
|
case 0x0059: return 1256;
|
||||||
|
case 0x005a: return 0;
|
||||||
|
case 0x005b: return 0;
|
||||||
|
case 0x005c: return 0;
|
||||||
|
case 0x005d: return 1252;
|
||||||
|
case 0x005e: return 0;
|
||||||
|
case 0x005f: return 1252;
|
||||||
|
case 0x0060: return 32759;
|
||||||
|
case 0x0061: return 0;
|
||||||
|
case 0x0062: return 1252;
|
||||||
|
case 0x0063: return 0;
|
||||||
|
case 0x0064: return 1252;
|
||||||
|
case 0x0065: return 0;
|
||||||
|
case 0x0066: return 32759;
|
||||||
|
case 0x0067: return 1252;
|
||||||
|
case 0x0068: return 1252;
|
||||||
|
case 0x0069: return 32759;
|
||||||
|
case 0x006a: return 1252;
|
||||||
|
case 0x006b: return 1252;
|
||||||
|
case 0x006c: return 1252;
|
||||||
|
case 0x006d: return 1251;
|
||||||
|
case 0x006e: return 1252;
|
||||||
|
case 0x006f: return 1252;
|
||||||
|
case 0x0070: return 1252;
|
||||||
|
case 0x0071: return 32759;
|
||||||
|
case 0x0072: return 0;
|
||||||
|
case 0x0073: return 0;
|
||||||
|
case 0x0074: return 1252;
|
||||||
|
case 0x0075: return 1252;
|
||||||
|
case 0x0076: return 32759;
|
||||||
|
case 0x0077: return 0;
|
||||||
|
case 0x0078: return 0;
|
||||||
|
case 0x0079: return 32759;
|
||||||
|
case 0x007a: return 1252;
|
||||||
|
case 0x007b: return 32759;
|
||||||
|
case 0x007c: return 1252;
|
||||||
|
case 0x007d: return 32759;
|
||||||
|
case 0x007e: return 1252;
|
||||||
|
case 0x007f: return 1252;
|
||||||
|
case 0x0080: return 1256;
|
||||||
|
case 0x0081: return 0;
|
||||||
|
case 0x0082: return 1252;
|
||||||
|
case 0x0083: return 1252;
|
||||||
|
case 0x0084: return 1252;
|
||||||
|
case 0x0085: return 1251;
|
||||||
|
case 0x0086: return 1252;
|
||||||
|
case 0x0087: return 1252;
|
||||||
|
case 0x0088: return 1252;
|
||||||
|
case 0x0089: return 32759;
|
||||||
|
case 0x008a: return 32759;
|
||||||
|
case 0x008b: return 32759;
|
||||||
|
case 0x008c: return 1256;
|
||||||
|
case 0x008d: return 32759;
|
||||||
|
case 0x008e: return 32759;
|
||||||
|
case 0x008f: return 32759;
|
||||||
|
case 0x0090: return 32759;
|
||||||
|
case 0x0091: return 1252;
|
||||||
|
case 0x0092: return 1256;
|
||||||
|
case 0x0093: return 32759;
|
||||||
|
case 0x0401: return 1256;
|
||||||
|
case 0x0402: return 1251;
|
||||||
|
case 0x0403: return 1252;
|
||||||
|
case 0x0404: return 950;
|
||||||
|
case 0x0405: return 1250;
|
||||||
|
case 0x0406: return 1252;
|
||||||
|
case 0x0407: return 1252;
|
||||||
|
case 0x0408: return 1253;
|
||||||
|
case 0x0409: return 1252;
|
||||||
|
case 0x040a: return 1252;
|
||||||
|
case 0x040b: return 1252;
|
||||||
|
case 0x040c: return 1252;
|
||||||
|
case 0x040d: return 1255;
|
||||||
|
case 0x040e: return 1250;
|
||||||
|
case 0x040f: return 1252;
|
||||||
|
case 0x0410: return 1252;
|
||||||
|
case 0x0411: return 932;
|
||||||
|
case 0x0412: return 949;
|
||||||
|
case 0x0413: return 1252;
|
||||||
|
case 0x0414: return 1252;
|
||||||
|
case 0x0415: return 1250;
|
||||||
|
case 0x0416: return 1252;
|
||||||
|
case 0x0417: return 1252;
|
||||||
|
case 0x0418: return 1250;
|
||||||
|
case 0x0419: return 1251;
|
||||||
|
case 0x041a: return 1250;
|
||||||
|
case 0x041b: return 1250;
|
||||||
|
case 0x041c: return 1250;
|
||||||
|
case 0x041d: return 1252;
|
||||||
|
case 0x041e: return 874;
|
||||||
|
case 0x041f: return 1254;
|
||||||
|
case 0x0420: return 1256;
|
||||||
|
case 0x0421: return 1252;
|
||||||
|
case 0x0422: return 1251;
|
||||||
|
case 0x0423: return 1251;
|
||||||
|
case 0x0424: return 1250;
|
||||||
|
case 0x0425: return 1257;
|
||||||
|
case 0x0426: return 1257;
|
||||||
|
case 0x0427: return 1257;
|
||||||
|
case 0x0428: return 1251;
|
||||||
|
case 0x0429: return 1256;
|
||||||
|
case 0x042a: return 1258;
|
||||||
|
case 0x042b: return 0;
|
||||||
|
case 0x042c: return 1254;
|
||||||
|
case 0x042d: return 1252;
|
||||||
|
case 0x042e: return 1252;
|
||||||
|
case 0x042f: return 1251;
|
||||||
|
case 0x0430: return 0;
|
||||||
|
case 0x0431: return 0;
|
||||||
|
case 0x0432: return 1252;
|
||||||
|
case 0x0433: return 32759;
|
||||||
|
case 0x0434: return 1252;
|
||||||
|
case 0x0435: return 1252;
|
||||||
|
case 0x0436: return 1252;
|
||||||
|
case 0x0437: return 0;
|
||||||
|
case 0x0438: return 1252;
|
||||||
|
case 0x0439: return 0;
|
||||||
|
case 0x043a: return 0;
|
||||||
|
case 0x043b: return 1252;
|
||||||
|
case 0x043d: return 32759;
|
||||||
|
case 0x043e: return 1252;
|
||||||
|
case 0x043f: return 0;
|
||||||
|
case 0x0440: return 1251;
|
||||||
|
case 0x0441: return 1252;
|
||||||
|
case 0x0442: return 1250;
|
||||||
|
case 0x0443: return 1254;
|
||||||
|
case 0x0444: return 1251;
|
||||||
|
case 0x0445: return 0;
|
||||||
|
case 0x0446: return 0;
|
||||||
|
case 0x0447: return 0;
|
||||||
|
case 0x0448: return 0;
|
||||||
|
case 0x0449: return 0;
|
||||||
|
case 0x044a: return 0;
|
||||||
|
case 0x044b: return 0;
|
||||||
|
case 0x044c: return 0;
|
||||||
|
case 0x044d: return 0;
|
||||||
|
case 0x044e: return 0;
|
||||||
|
case 0x044f: return 0;
|
||||||
|
case 0x0450: return 1251;
|
||||||
|
case 0x0451: return 0;
|
||||||
|
case 0x0452: return 1252;
|
||||||
|
case 0x0453: return 0;
|
||||||
|
case 0x0454: return 0;
|
||||||
|
case 0x0455: return 0;
|
||||||
|
case 0x0456: return 1252;
|
||||||
|
case 0x0457: return 0;
|
||||||
|
case 0x0458: return 32759;
|
||||||
|
case 0x0459: return 32759;
|
||||||
|
case 0x045a: return 0;
|
||||||
|
case 0x045b: return 0;
|
||||||
|
case 0x045c: return 0;
|
||||||
|
case 0x045d: return 0;
|
||||||
|
case 0x045e: return 0;
|
||||||
|
case 0x045f: return 32759;
|
||||||
|
case 0x0460: return 32759;
|
||||||
|
case 0x0461: return 0;
|
||||||
|
case 0x0462: return 1252;
|
||||||
|
case 0x0463: return 0;
|
||||||
|
case 0x0464: return 1252;
|
||||||
|
case 0x0465: return 0;
|
||||||
|
case 0x0466: return 32759;
|
||||||
|
case 0x0467: return 32759;
|
||||||
|
case 0x0468: return 1252;
|
||||||
|
case 0x0469: return 32759;
|
||||||
|
case 0x046a: return 1252;
|
||||||
|
case 0x046b: return 1252;
|
||||||
|
case 0x046c: return 1252;
|
||||||
|
case 0x046d: return 1251;
|
||||||
|
case 0x046e: return 1252;
|
||||||
|
case 0x046f: return 1252;
|
||||||
|
case 0x0470: return 1252;
|
||||||
|
case 0x0471: return 32759;
|
||||||
|
case 0x0472: return 0;
|
||||||
|
case 0x0473: return 0;
|
||||||
|
case 0x0474: return 1252;
|
||||||
|
case 0x0475: return 1252;
|
||||||
|
case 0x0476: return 32759;
|
||||||
|
case 0x0477: return 0;
|
||||||
|
case 0x0478: return 0;
|
||||||
|
case 0x0479: return 32759;
|
||||||
|
case 0x047a: return 1252;
|
||||||
|
case 0x047c: return 1252;
|
||||||
|
case 0x047e: return 1252;
|
||||||
|
case 0x0480: return 1256;
|
||||||
|
case 0x0481: return 0;
|
||||||
|
case 0x0482: return 1252;
|
||||||
|
case 0x0483: return 1252;
|
||||||
|
case 0x0484: return 1252;
|
||||||
|
case 0x0485: return 1251;
|
||||||
|
case 0x0486: return 1252;
|
||||||
|
case 0x0487: return 1252;
|
||||||
|
case 0x0488: return 1252;
|
||||||
|
case 0x048c: return 1256;
|
||||||
|
case 0x048d: return 32759;
|
||||||
|
case 0x048e: return 32759;
|
||||||
|
case 0x048f: return 32759;
|
||||||
|
case 0x0490: return 32759;
|
||||||
|
case 0x0491: return 1252;
|
||||||
|
case 0x0492: return 1256;
|
||||||
|
case 0x0493: return 32759;
|
||||||
|
case 0x0501: return 1250;
|
||||||
|
case 0x05fe: return 932;
|
||||||
|
case 0x0801: return 1256;
|
||||||
|
case 0x0803: return 1252;
|
||||||
|
case 0x0804: return 936;
|
||||||
|
case 0x0807: return 1252;
|
||||||
|
case 0x0809: return 1252;
|
||||||
|
case 0x080a: return 1252;
|
||||||
|
case 0x080c: return 1252;
|
||||||
|
case 0x0810: return 1252;
|
||||||
|
case 0x0811: return 32759;
|
||||||
|
case 0x0813: return 1252;
|
||||||
|
case 0x0814: return 1252;
|
||||||
|
case 0x0816: return 1252;
|
||||||
|
case 0x0818: return 0;
|
||||||
|
case 0x0819: return 32759;
|
||||||
|
case 0x081a: return 1250;
|
||||||
|
case 0x081d: return 1252;
|
||||||
|
case 0x0820: return 0;
|
||||||
|
case 0x0827: return 32759;
|
||||||
|
case 0x082c: return 1251;
|
||||||
|
case 0x082e: return 1252;
|
||||||
|
case 0x0832: return 1252;
|
||||||
|
case 0x083b: return 1252;
|
||||||
|
case 0x083c: return 1252;
|
||||||
|
case 0x083e: return 1252;
|
||||||
|
case 0x0843: return 1251;
|
||||||
|
case 0x0845: return 0;
|
||||||
|
case 0x0846: return 1256;
|
||||||
|
case 0x0849: return 0;
|
||||||
|
case 0x0850: return 0;
|
||||||
|
case 0x0851: return 32759;
|
||||||
|
case 0x0859: return 1256;
|
||||||
|
case 0x085d: return 1252;
|
||||||
|
case 0x085f: return 1252;
|
||||||
|
case 0x0860: return 32759;
|
||||||
|
case 0x0861: return 0;
|
||||||
|
case 0x0867: return 1252;
|
||||||
|
case 0x086b: return 1252;
|
||||||
|
case 0x0873: return 0;
|
||||||
|
case 0x09ff: return 1256;
|
||||||
|
case 0x0c01: return 1256;
|
||||||
|
case 0x0c04: return 950;
|
||||||
|
case 0x0c07: return 1252;
|
||||||
|
case 0x0c09: return 1252;
|
||||||
|
case 0x0c0a: return 1252;
|
||||||
|
case 0x0c0c: return 1252;
|
||||||
|
case 0x0c1a: return 1251;
|
||||||
|
case 0x0c3b: return 1252;
|
||||||
|
case 0x0c5f: return 32759;
|
||||||
|
case 0x0c6b: return 1252;
|
||||||
|
case 0x1001: return 1256;
|
||||||
|
case 0x1004: return 936;
|
||||||
|
case 0x1007: return 1252;
|
||||||
|
case 0x1009: return 1252;
|
||||||
|
case 0x100a: return 1252;
|
||||||
|
case 0x100c: return 1252;
|
||||||
|
case 0x101a: return 1250;
|
||||||
|
case 0x103b: return 1252;
|
||||||
|
case 0x1401: return 1256;
|
||||||
|
case 0x1404: return 950;
|
||||||
|
case 0x1407: return 1252;
|
||||||
|
case 0x1409: return 1252;
|
||||||
|
case 0x140a: return 1252;
|
||||||
|
case 0x140c: return 1252;
|
||||||
|
case 0x141a: return 1250;
|
||||||
|
case 0x143b: return 1252;
|
||||||
|
case 0x1801: return 1256;
|
||||||
|
case 0x1809: return 1252;
|
||||||
|
case 0x180a: return 1252;
|
||||||
|
case 0x180c: return 1252;
|
||||||
|
case 0x181a: return 1250;
|
||||||
|
case 0x183b: return 1252;
|
||||||
|
case 0x1c01: return 1256;
|
||||||
|
case 0x1c09: return 1252;
|
||||||
|
case 0x1c0a: return 1252;
|
||||||
|
case 0x1c0c: return 32759;
|
||||||
|
case 0x1c1a: return 1251;
|
||||||
|
case 0x1c3b: return 1252;
|
||||||
|
case 0x2001: return 1256;
|
||||||
|
case 0x2008: return 32759;
|
||||||
|
case 0x2009: return 1252;
|
||||||
|
case 0x200a: return 1252;
|
||||||
|
case 0x200c: return 0;
|
||||||
|
case 0x201a: return 1251;
|
||||||
|
case 0x203b: return 1252;
|
||||||
|
case 0x2401: return 1256;
|
||||||
|
case 0x2409: return 1252;
|
||||||
|
case 0x240a: return 1252;
|
||||||
|
case 0x240c: return 0;
|
||||||
|
case 0x241a: return 1250;
|
||||||
|
case 0x243b: return 1252;
|
||||||
|
case 0x2801: return 1256;
|
||||||
|
case 0x2809: return 1252;
|
||||||
|
case 0x280a: return 1252;
|
||||||
|
case 0x280c: return 0;
|
||||||
|
case 0x281a: return 1251;
|
||||||
|
case 0x2c01: return 1256;
|
||||||
|
case 0x2c09: return 1252;
|
||||||
|
case 0x2c0a: return 1252;
|
||||||
|
case 0x2c0c: return 0;
|
||||||
|
case 0x2c1a: return 1250;
|
||||||
|
case 0x3001: return 1256;
|
||||||
|
case 0x3009: return 1252;
|
||||||
|
case 0x300a: return 1252;
|
||||||
|
case 0x300c: return 0;
|
||||||
|
case 0x301a: return 1251;
|
||||||
|
case 0x3401: return 1256;
|
||||||
|
case 0x3409: return 1252;
|
||||||
|
case 0x340a: return 1252;
|
||||||
|
case 0x340c: return 0;
|
||||||
|
case 0x3801: return 1256;
|
||||||
|
case 0x3809: return 32759;
|
||||||
|
case 0x380a: return 1252;
|
||||||
|
case 0x380c: return 0;
|
||||||
|
case 0x3c01: return 1256;
|
||||||
|
case 0x3c09: return 0;
|
||||||
|
case 0x3c0a: return 1252;
|
||||||
|
case 0x3c0c: return 0;
|
||||||
|
case 0x4001: return 1256;
|
||||||
|
case 0x4009: return 1252;
|
||||||
|
case 0x400a: return 1252;
|
||||||
|
case 0x4401: return 32759;
|
||||||
|
case 0x4409: return 1252;
|
||||||
|
case 0x440a: return 1252;
|
||||||
|
case 0x4801: return 32759;
|
||||||
|
case 0x4809: return 1252;
|
||||||
|
case 0x480a: return 1252;
|
||||||
|
case 0x4c09: return 32759;
|
||||||
|
case 0x4c0a: return 1252;
|
||||||
|
case 0x5009: return 32759;
|
||||||
|
case 0x500a: return 1252;
|
||||||
|
case 0x5409: return 32759;
|
||||||
|
case 0x540a: return 1252;
|
||||||
|
case 0x5809: return 32759;
|
||||||
|
case 0x5c09: return 32759;
|
||||||
|
case 0x6009: return 32759;
|
||||||
|
case 0x6409: return 32759;
|
||||||
|
case 0x641a: return 1251;
|
||||||
|
case 0x681a: return 1250;
|
||||||
|
case 0x6c1a: return 1251;
|
||||||
|
case 0x701a: return 1250;
|
||||||
|
case 0x703b: return 1252;
|
||||||
|
case 0x742c: return 1251;
|
||||||
|
case 0x743b: return 1252;
|
||||||
|
case 0x7804: return 936;
|
||||||
|
case 0x7814: return 1252;
|
||||||
|
case 0x781a: return 1250;
|
||||||
|
case 0x782c: return 1254;
|
||||||
|
case 0x783b: return 1252;
|
||||||
|
case 0x7843: return 1251;
|
||||||
|
case 0x7850: return 1251;
|
||||||
|
case 0x785d: return 0;
|
||||||
|
case 0x7c04: return 950;
|
||||||
|
case 0x7c14: return 1252;
|
||||||
|
case 0x7c1a: return 1250;
|
||||||
|
case 0x7c28: return 1251;
|
||||||
|
case 0x7c2e: return 1252;
|
||||||
|
case 0x7c3b: return 1252;
|
||||||
|
case 0x7c43: return 1254;
|
||||||
|
case 0x7c46: return 1256;
|
||||||
|
case 0x7c50: return 0;
|
||||||
|
case 0x7c59: return 1256;
|
||||||
|
case 0x7c5c: return 0;
|
||||||
|
case 0x7c5d: return 1252;
|
||||||
|
case 0x7c5f: return 1252;
|
||||||
|
case 0x7c67: return 1252;
|
||||||
|
case 0x7c68: return 1252;
|
||||||
|
case 0x7c92: return 1256;
|
||||||
|
default: return 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -50,6 +50,7 @@ import org.apache.poi.hsmf.parsers.POIFSChunkParser;
|
||||||
import org.apache.poi.poifs.filesystem.DirectoryNode;
|
import org.apache.poi.poifs.filesystem.DirectoryNode;
|
||||||
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
|
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
|
||||||
import org.apache.poi.util.CodePageUtil;
|
import org.apache.poi.util.CodePageUtil;
|
||||||
|
import org.apache.poi.util.LocaleUtil;
|
||||||
import org.apache.poi.util.POILogFactory;
|
import org.apache.poi.util.POILogFactory;
|
||||||
import org.apache.poi.util.POILogger;
|
import org.apache.poi.util.POILogger;
|
||||||
|
|
||||||
|
@ -210,8 +211,21 @@ public class MAPIMessage extends POIReadOnlyDocument {
|
||||||
* returnNullOnMissingChunk is set
|
* returnNullOnMissingChunk is set
|
||||||
*/
|
*/
|
||||||
public String getHtmlBody() throws ChunkNotFoundException {
|
public String getHtmlBody() throws ChunkNotFoundException {
|
||||||
if(mainChunks.getHtmlBodyChunkBinary() != null) {
|
ByteChunk htmlBodyBinaryChunk = mainChunks.getHtmlBodyChunkBinary();
|
||||||
return mainChunks.getHtmlBodyChunkBinary().getAs7bitString();
|
if (htmlBodyBinaryChunk != null) {
|
||||||
|
List<PropertyValue> cpid = mainChunks.getProperties().get(MAPIProperty.INTERNET_CPID);
|
||||||
|
if (cpid != null && cpid.size() > 0) {
|
||||||
|
int codepage = ((LongPropertyValue) cpid.get(0)).getValue();
|
||||||
|
try {
|
||||||
|
String encoding = CodePageUtil.codepageToEncoding(codepage, true);
|
||||||
|
byte[] htmlBodyBinary = htmlBodyBinaryChunk.getValue();
|
||||||
|
return new String(htmlBodyBinary, encoding);
|
||||||
|
} catch (UnsupportedEncodingException e) {
|
||||||
|
logger.log(POILogger.WARN, "HTML body binary: Invalid codepage ID ", codepage, " set for the message via ",
|
||||||
|
MAPIProperty.INTERNET_CPID, ", ignoring");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return htmlBodyBinaryChunk.getAs7bitString();
|
||||||
}
|
}
|
||||||
return getStringFromChunk(mainChunks.getHtmlBodyChunkString());
|
return getStringFromChunk(mainChunks.getHtmlBodyChunkString());
|
||||||
}
|
}
|
||||||
|
@ -391,67 +405,86 @@ public class MAPIMessage extends POIReadOnlyDocument {
|
||||||
* <p>Bug #49441 has more on why this is needed</p>
|
* <p>Bug #49441 has more on why this is needed</p>
|
||||||
*/
|
*/
|
||||||
public void guess7BitEncoding() {
|
public void guess7BitEncoding() {
|
||||||
// First choice is a codepage property
|
String generalcodepage = null;
|
||||||
for (MAPIProperty prop : new MAPIProperty[] {
|
String htmlbodycodepage = null;
|
||||||
MAPIProperty.MESSAGE_CODEPAGE,
|
String bodycodepage = null;
|
||||||
MAPIProperty.INTERNET_CPID
|
//
|
||||||
}) {
|
// General codepage: Message codepage property.
|
||||||
List<PropertyValue> val = mainChunks.getProperties().get(prop);
|
//
|
||||||
if (val != null && val.size() > 0) {
|
List<PropertyValue> val = mainChunks.getProperties().get(MAPIProperty.MESSAGE_CODEPAGE);
|
||||||
int codepage = ((LongPropertyValue)val.get(0)).getValue();
|
if (val != null && val.size() > 0) {
|
||||||
try {
|
int codepage = ((LongPropertyValue) val.get(0)).getValue();
|
||||||
String encoding = CodePageUtil.codepageToEncoding(codepage, true);
|
try {
|
||||||
set7BitEncoding(encoding);
|
String encoding = CodePageUtil.codepageToEncoding(codepage, true);
|
||||||
return;
|
generalcodepage = encoding;
|
||||||
} catch(UnsupportedEncodingException e) {
|
} catch (UnsupportedEncodingException e) {
|
||||||
logger.log(POILogger.WARN, "Invalid codepage ID ", codepage,
|
logger.log(POILogger.WARN, "Invalid codepage ID ", codepage, " set for the message via ",
|
||||||
" set for the message via ", prop, ", ignoring");
|
MAPIProperty.MESSAGE_CODEPAGE, ", ignoring");
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
//
|
||||||
|
// General codepage fallback: Message locale ID property.
|
||||||
|
//
|
||||||
|
if (generalcodepage == null) {
|
||||||
|
val = mainChunks.getProperties().get(MAPIProperty.MESSAGE_LOCALE_ID);
|
||||||
|
if (val != null && val.size() > 0) {
|
||||||
|
int lcid = ((LongPropertyValue) val.get(0)).getValue();
|
||||||
|
int codepage = LocaleUtil.getDefaultCodePageFromLCID(lcid);
|
||||||
|
try {
|
||||||
|
if (codepage != 0) {
|
||||||
|
String encoding = CodePageUtil.codepageToEncoding(codepage, true);
|
||||||
|
generalcodepage = encoding;
|
||||||
|
}
|
||||||
|
} catch (UnsupportedEncodingException e) {
|
||||||
|
logger.log(POILogger.WARN, "Invalid codepage ID ", codepage, "from locale ID", lcid, " set for the message via ",
|
||||||
|
MAPIProperty.MESSAGE_LOCALE_ID, ", ignoring");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
//
|
||||||
// Second choice is a charset on a content type header
|
// General codepage fallback: Charset on a content type header.
|
||||||
try {
|
//
|
||||||
|
if (generalcodepage == null) {
|
||||||
|
try {
|
||||||
String[] headers = getHeaders();
|
String[] headers = getHeaders();
|
||||||
if(headers != null && headers.length > 0) {
|
if (headers != null && headers.length > 0) {
|
||||||
// Look for a content type with a charset
|
Pattern p = Pattern.compile("content-type:.*?charset=[\"']?([^;'\"]+)[\"']?", Pattern.CASE_INSENSITIVE);
|
||||||
Pattern p = Pattern.compile("Content-Type:.*?charset=[\"']?([^;'\"]+)[\"']?", Pattern.CASE_INSENSITIVE);
|
for (String header : headers) {
|
||||||
|
if (header.toLowerCase().startsWith("content-type")) {
|
||||||
for(String header : headers) {
|
Matcher m = p.matcher(header);
|
||||||
if(header.startsWith("Content-Type")) {
|
if (m.matches()) {
|
||||||
Matcher m = p.matcher(header);
|
String encoding = m.group(1);
|
||||||
if(m.matches()) {
|
generalcodepage = encoding;
|
||||||
// Found it! Tell all the string chunks
|
|
||||||
String charset = m.group(1);
|
|
||||||
|
|
||||||
if (!charset.equalsIgnoreCase("utf-8")) {
|
|
||||||
set7BitEncoding(charset);
|
|
||||||
}
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
} catch(ChunkNotFoundException e) {}
|
} catch (ChunkNotFoundException e) {
|
||||||
|
}
|
||||||
// Nothing suitable in the headers, try HTML
|
}
|
||||||
try {
|
//
|
||||||
String html = getHtmlBody();
|
// HTML and text body encoding: Internet CPID property.
|
||||||
if(html != null && html.length() > 0) {
|
// UTF-8 is ignored for text body. This seems to be a special Outlook behavior.
|
||||||
// Look for a content type in the meta headers
|
//
|
||||||
Pattern p = Pattern.compile(
|
val = mainChunks.getProperties().get(MAPIProperty.INTERNET_CPID);
|
||||||
"<META\\s+HTTP-EQUIV=\"Content-Type\"\\s+CONTENT=\"text/html;\\s+charset=(.*?)\""
|
if (val != null && val.size() > 0) {
|
||||||
);
|
int codepage = ((LongPropertyValue) val.get(0)).getValue();
|
||||||
Matcher m = p.matcher(html);
|
try {
|
||||||
if(m.find()) {
|
String encoding = CodePageUtil.codepageToEncoding(codepage, true);
|
||||||
// Found it! Tell all the string chunks
|
htmlbodycodepage = encoding;
|
||||||
String charset = m.group(1);
|
if (!encoding.equalsIgnoreCase("utf-8")) {
|
||||||
set7BitEncoding(charset);
|
bodycodepage = encoding;
|
||||||
}
|
|
||||||
}
|
}
|
||||||
} catch(ChunkNotFoundException e) {}
|
} catch (UnsupportedEncodingException e) {
|
||||||
}
|
logger.log(POILogger.WARN, "Invalid codepage ID ", codepage, " set for the message via ",
|
||||||
|
MAPIProperty.INTERNET_CPID, ", ignoring");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
//
|
||||||
|
// Apply encoding
|
||||||
|
//
|
||||||
|
set7BitEncoding(generalcodepage, htmlbodycodepage, bodycodepage);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Many messages store their strings as unicode, which is
|
* Many messages store their strings as unicode, which is
|
||||||
|
@ -464,26 +497,41 @@ public class MAPIMessage extends POIReadOnlyDocument {
|
||||||
* @see #guess7BitEncoding()
|
* @see #guess7BitEncoding()
|
||||||
*/
|
*/
|
||||||
public void set7BitEncoding(String charset) {
|
public void set7BitEncoding(String charset) {
|
||||||
|
set7BitEncoding(charset, charset, charset);
|
||||||
|
}
|
||||||
|
public void set7BitEncoding(String generalcharset, String htmlbodycharset, String bodycharset) {
|
||||||
for(Chunk c : mainChunks.getChunks()) {
|
for(Chunk c : mainChunks.getChunks()) {
|
||||||
if(c instanceof StringChunk) {
|
if(c instanceof StringChunk) {
|
||||||
((StringChunk)c).set7BitEncoding(charset);
|
if (c.getChunkId() == MAPIProperty.BODY_HTML.id) {
|
||||||
|
if (htmlbodycharset != null) {
|
||||||
|
((StringChunk)c).set7BitEncoding(htmlbodycharset);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else if (c.getChunkId() == MAPIProperty.BODY.id) {
|
||||||
|
if (bodycharset != null) {
|
||||||
|
((StringChunk)c).set7BitEncoding(bodycharset);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else if (generalcharset != null) {
|
||||||
|
((StringChunk)c).set7BitEncoding(generalcharset);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if (generalcharset != null) {
|
||||||
if (nameIdChunks!=null) {
|
if (nameIdChunks!=null) {
|
||||||
for(Chunk c : nameIdChunks.getChunks()) {
|
for(Chunk c : nameIdChunks.getChunks()) {
|
||||||
if(c instanceof StringChunk) {
|
if(c instanceof StringChunk) {
|
||||||
((StringChunk)c).set7BitEncoding(charset);
|
((StringChunk)c).set7BitEncoding(generalcharset);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
for(RecipientChunks rc : recipientChunks) {
|
||||||
for(RecipientChunks rc : recipientChunks) {
|
for(Chunk c : rc.getAll()) {
|
||||||
for(Chunk c : rc.getAll()) {
|
if(c instanceof StringChunk) {
|
||||||
if(c instanceof StringChunk) {
|
((StringChunk)c).set7BitEncoding(generalcharset);
|
||||||
((StringChunk)c).set7BitEncoding(charset);
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -512,6 +512,8 @@ public class MAPIProperty {
|
||||||
new MAPIProperty(0x1a, ASCII_STRING, "MessageClass", "PR_MESSAGE_CLASS");
|
new MAPIProperty(0x1a, ASCII_STRING, "MessageClass", "PR_MESSAGE_CLASS");
|
||||||
public static final MAPIProperty MESSAGE_CODEPAGE =
|
public static final MAPIProperty MESSAGE_CODEPAGE =
|
||||||
new MAPIProperty(0x3ffd, Types.LONG, "MessageCodepage", "PR_MESSAGE_CODEPAGE");
|
new MAPIProperty(0x3ffd, Types.LONG, "MessageCodepage", "PR_MESSAGE_CODEPAGE");
|
||||||
|
public static final MAPIProperty MESSAGE_LOCALE_ID =
|
||||||
|
new MAPIProperty(0x3ff1, Types.LONG, "MessageLocaleId", "PR_MESSAGE_LOCALE_ID");
|
||||||
public static final MAPIProperty MESSAGE_DELIVERY_ID =
|
public static final MAPIProperty MESSAGE_DELIVERY_ID =
|
||||||
new MAPIProperty(0x1b, BINARY, "MessageDeliveryId", "PR_MESSAGE_DELIVERY_ID");
|
new MAPIProperty(0x1b, BINARY, "MessageDeliveryId", "PR_MESSAGE_DELIVERY_ID");
|
||||||
public static final MAPIProperty MESSAGE_DELIVERY_TIME =
|
public static final MAPIProperty MESSAGE_DELIVERY_TIME =
|
||||||
|
|
|
@ -39,7 +39,8 @@ import org.junit.runners.Suite;
|
||||||
TestPOIFSChunkParser.class,
|
TestPOIFSChunkParser.class,
|
||||||
TestMessageSubmissionChunkY2KRead.class,
|
TestMessageSubmissionChunkY2KRead.class,
|
||||||
TestMessageSubmissionChunk.class,
|
TestMessageSubmissionChunk.class,
|
||||||
TestExtractEmbeddedMSG.class
|
TestExtractEmbeddedMSG.class,
|
||||||
|
Test7BitCodepage.class
|
||||||
})
|
})
|
||||||
public class AllHSMFTests {
|
public class AllHSMFTests {
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,85 @@
|
||||||
|
/* ====================================================================
|
||||||
|
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
contributor license agreements. See the NOTICE file distributed with
|
||||||
|
this work for additional information regarding copyright ownership.
|
||||||
|
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
(the "License"); you may not use this file except in compliance with
|
||||||
|
the License. You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
==================================================================== */
|
||||||
|
|
||||||
|
package org.apache.poi.hsmf;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
import junit.framework.TestCase;
|
||||||
|
|
||||||
|
import org.apache.poi.POIDataSamples;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Tests to verify if code page for general properties like subject,
|
||||||
|
* text body and html body is evaluated correctly.
|
||||||
|
*/
|
||||||
|
public final class Test7BitCodepage extends TestCase {
|
||||||
|
private final MAPIMessage ascii_cp1251_lcid1049;
|
||||||
|
private final MAPIMessage ascii_utf_8_cp1252_lcid1031;
|
||||||
|
private final MAPIMessage ascii_utf_8_cp1252_lcid1031_html;
|
||||||
|
private final MAPIMessage htmlbodybinary_cp1251;
|
||||||
|
private final MAPIMessage htmlbodybinary_utf_8;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Initialize this test, load up the messages.
|
||||||
|
* @throws Exception
|
||||||
|
*/
|
||||||
|
public Test7BitCodepage() throws IOException {
|
||||||
|
POIDataSamples samples = POIDataSamples.getHSMFInstance();
|
||||||
|
ascii_cp1251_lcid1049 = new MAPIMessage(samples.openResourceAsStream("ASCII_CP1251_LCID1049.msg"));
|
||||||
|
ascii_utf_8_cp1252_lcid1031 = new MAPIMessage(samples.openResourceAsStream("ASCII_UTF-8_CP1252_LCID1031.msg"));
|
||||||
|
ascii_utf_8_cp1252_lcid1031_html = new MAPIMessage(samples.openResourceAsStream("ASCII_UTF-8_CP1252_LCID1031_HTML.msg"));
|
||||||
|
htmlbodybinary_cp1251 = new MAPIMessage(samples.openResourceAsStream("HTMLBodyBinary_CP1251.msg"));
|
||||||
|
htmlbodybinary_utf_8 = new MAPIMessage(samples.openResourceAsStream("HTMLBodyBinary_UTF-8.msg"));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Evaluate encoding and check if the subject, text body and html body is decoded correctly.
|
||||||
|
*/
|
||||||
|
public void test7BitEncoding() throws Exception {
|
||||||
|
ascii_cp1251_lcid1049.guess7BitEncoding();
|
||||||
|
ascii_cp1251_lcid1049.setReturnNullOnMissingChunk(true);
|
||||||
|
ascii_utf_8_cp1252_lcid1031.guess7BitEncoding();
|
||||||
|
ascii_utf_8_cp1252_lcid1031.setReturnNullOnMissingChunk(true);
|
||||||
|
ascii_utf_8_cp1252_lcid1031_html.guess7BitEncoding();
|
||||||
|
ascii_utf_8_cp1252_lcid1031_html.setReturnNullOnMissingChunk(true);
|
||||||
|
htmlbodybinary_cp1251.guess7BitEncoding();
|
||||||
|
htmlbodybinary_cp1251.setReturnNullOnMissingChunk(true);
|
||||||
|
htmlbodybinary_utf_8.guess7BitEncoding();
|
||||||
|
htmlbodybinary_utf_8.setReturnNullOnMissingChunk(true);
|
||||||
|
|
||||||
|
assertEquals("Subject автоматически Subject", ascii_cp1251_lcid1049.getSubject());
|
||||||
|
assertEquals("Body автоматически Body", ascii_cp1251_lcid1049.getTextBody());
|
||||||
|
assertEquals("<!DOCTYPE html><html><meta charset=\\\"windows-1251\\\"><body>HTML автоматически</body></html>", ascii_cp1251_lcid1049.getHtmlBody());
|
||||||
|
|
||||||
|
assertEquals("Subject öäü Subject", ascii_utf_8_cp1252_lcid1031.getSubject());
|
||||||
|
assertEquals("Body öäü Body", ascii_utf_8_cp1252_lcid1031.getTextBody());
|
||||||
|
assertNull(ascii_utf_8_cp1252_lcid1031.getHtmlBody());
|
||||||
|
|
||||||
|
assertEquals("Subject öäü Subject", ascii_utf_8_cp1252_lcid1031_html.getSubject());
|
||||||
|
assertEquals("Body öäü Body", ascii_utf_8_cp1252_lcid1031_html.getTextBody());
|
||||||
|
assertEquals("<!DOCTYPE html><html><meta charset=\\\"utf-8\\\"><body>HTML öäü</body></html>", ascii_utf_8_cp1252_lcid1031_html.getHtmlBody());
|
||||||
|
|
||||||
|
assertEquals("Subject öäü Subject", htmlbodybinary_cp1251.getSubject());
|
||||||
|
assertNull(htmlbodybinary_cp1251.getTextBody());
|
||||||
|
assertEquals("<!DOCTYPE html><html><meta charset=\\\"utf-8\\\"><body>HTML автоматически</body></html>", htmlbodybinary_cp1251.getHtmlBody());
|
||||||
|
|
||||||
|
assertEquals("Subject öäü Subject", htmlbodybinary_utf_8.getSubject());
|
||||||
|
assertNull(htmlbodybinary_utf_8.getTextBody());
|
||||||
|
assertEquals("<!DOCTYPE html><html><meta charset=\\\"utf-8\\\"><body>HTML öäü</body></html>", htmlbodybinary_utf_8.getHtmlBody());
|
||||||
|
}
|
||||||
|
}
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Loading…
Reference in New Issue