mirror of https://github.com/apache/poi.git
fix spelling of OutlookTextExtractor class name
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1871986 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
7493473a8e
commit
25a82d8c55
|
@ -16,174 +16,46 @@
|
|||
==================================================================== */
|
||||
package org.apache.poi.hsmf.extractor;
|
||||
|
||||
import static org.apache.poi.util.StringUtil.startsWithIgnoreCase;
|
||||
import org.apache.poi.hsmf.MAPIMessage;
|
||||
import org.apache.poi.poifs.filesystem.DirectoryNode;
|
||||
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
|
||||
import org.apache.poi.util.Removal;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.text.SimpleDateFormat;
|
||||
import java.util.Locale;
|
||||
|
||||
import org.apache.poi.extractor.POIOLE2TextExtractor;
|
||||
import org.apache.poi.hsmf.MAPIMessage;
|
||||
import org.apache.poi.hsmf.datatypes.AttachmentChunks;
|
||||
import org.apache.poi.hsmf.datatypes.StringChunk;
|
||||
import org.apache.poi.hsmf.exceptions.ChunkNotFoundException;
|
||||
import org.apache.poi.poifs.filesystem.DirectoryNode;
|
||||
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
|
||||
import org.apache.poi.util.LocaleUtil;
|
||||
import org.apache.poi.util.StringUtil.StringsIterator;
|
||||
|
||||
/**
|
||||
* A text extractor for HSMF (Outlook) .msg files.
|
||||
* Outputs in a format somewhat like a plain text email.
|
||||
*
|
||||
* @deprecated use @{link OutlookTextExtractor} instead
|
||||
*/
|
||||
public class OutlookTextExtactor extends POIOLE2TextExtractor {
|
||||
public OutlookTextExtactor(MAPIMessage msg) {
|
||||
super(msg);
|
||||
}
|
||||
public OutlookTextExtactor(DirectoryNode poifsDir) throws IOException {
|
||||
this(new MAPIMessage(poifsDir));
|
||||
}
|
||||
public OutlookTextExtactor(POIFSFileSystem fs) throws IOException {
|
||||
this(new MAPIMessage(fs));
|
||||
}
|
||||
public OutlookTextExtactor(InputStream inp) throws IOException {
|
||||
this(new MAPIMessage(inp));
|
||||
}
|
||||
|
||||
public static void main(String[] args) throws Exception {
|
||||
for(String filename : args) {
|
||||
try (POIFSFileSystem poifs = new POIFSFileSystem(new File(filename));
|
||||
OutlookTextExtactor extractor = new OutlookTextExtactor(poifs)) {
|
||||
System.out.println(extractor.getText());
|
||||
}
|
||||
}
|
||||
}
|
||||
@Deprecated
|
||||
@Removal(version = "5.0.0")
|
||||
public class OutlookTextExtactor extends OutlookTextExtractor {
|
||||
public OutlookTextExtactor(MAPIMessage msg) {
|
||||
super(msg);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the underlying MAPI message
|
||||
*/
|
||||
public MAPIMessage getMAPIMessage() {
|
||||
return (MAPIMessage)document;
|
||||
}
|
||||
|
||||
/**
|
||||
* Outputs something a little like a RFC822 email
|
||||
*/
|
||||
public String getText() {
|
||||
MAPIMessage msg = (MAPIMessage)document;
|
||||
StringBuilder s = new StringBuilder();
|
||||
|
||||
// See if we can get a suitable encoding for any
|
||||
// non unicode text in the file
|
||||
msg.guess7BitEncoding();
|
||||
|
||||
// Off we go
|
||||
StringsIterator emails;
|
||||
try {
|
||||
emails = new StringsIterator(
|
||||
msg.getRecipientEmailAddressList()
|
||||
);
|
||||
} catch(ChunkNotFoundException e) {
|
||||
emails = new StringsIterator(new String[0]);
|
||||
}
|
||||
|
||||
try {
|
||||
s.append("From: " + msg.getDisplayFrom() + "\n");
|
||||
} catch(ChunkNotFoundException e) {}
|
||||
|
||||
// For To, CC and BCC, try to match the names
|
||||
// up with their email addresses. Relies on the
|
||||
// Recipient Chunks being in the same order as
|
||||
// people in To + CC + BCC.
|
||||
try {
|
||||
handleEmails(s, "To", msg.getDisplayTo(), emails);
|
||||
} catch(ChunkNotFoundException e) {}
|
||||
try {
|
||||
handleEmails(s, "CC", msg.getDisplayCC(), emails);
|
||||
} catch(ChunkNotFoundException e) {}
|
||||
try {
|
||||
handleEmails(s, "BCC", msg.getDisplayBCC(), emails);
|
||||
} catch(ChunkNotFoundException e) {}
|
||||
|
||||
// Date - try two ways to find it
|
||||
try {
|
||||
// First try via the proper chunk
|
||||
SimpleDateFormat f = new SimpleDateFormat("E, d MMM yyyy HH:mm:ss Z", Locale.ROOT);
|
||||
f.setTimeZone(LocaleUtil.getUserTimeZone());
|
||||
s.append("Date: ").append(f.format(msg.getMessageDate().getTime())).append("\n");
|
||||
} catch(ChunkNotFoundException e) {
|
||||
try {
|
||||
// Failing that try via the raw headers
|
||||
String[] headers = msg.getHeaders();
|
||||
for(String header: headers) {
|
||||
if(startsWithIgnoreCase(header, "date:")) {
|
||||
s.append("Date:").append(header, header.indexOf(':')+1, header.length()).append("\n");
|
||||
break;
|
||||
}
|
||||
public OutlookTextExtactor(DirectoryNode poifsDir) throws IOException {
|
||||
super(new MAPIMessage(poifsDir));
|
||||
}
|
||||
|
||||
public OutlookTextExtactor(POIFSFileSystem fs) throws IOException {
|
||||
super(new MAPIMessage(fs));
|
||||
}
|
||||
|
||||
public OutlookTextExtactor(InputStream inp) throws IOException {
|
||||
super(new MAPIMessage(inp));
|
||||
}
|
||||
|
||||
public static void main(String[] args) throws Exception {
|
||||
for (String filename : args) {
|
||||
try (POIFSFileSystem poifs = new POIFSFileSystem(new File(filename));
|
||||
OutlookTextExtractor extractor = new OutlookTextExtractor(poifs)) {
|
||||
System.out.println(extractor.getText());
|
||||
}
|
||||
} catch(ChunkNotFoundException he) {
|
||||
// We can't find the date, sorry...
|
||||
}
|
||||
}
|
||||
|
||||
try {
|
||||
s.append("Subject: ").append(msg.getSubject()).append("\n");
|
||||
} catch(ChunkNotFoundException e) {}
|
||||
|
||||
// Display attachment names
|
||||
// To get the attachments, use ExtractorFactory
|
||||
for(AttachmentChunks att : msg.getAttachmentFiles()) {
|
||||
StringChunk name = att.getAttachLongFileName();
|
||||
if (name == null) name = att.getAttachFileName();
|
||||
String attName = name == null ? null : name.getValue();
|
||||
|
||||
if(att.getAttachMimeTag() != null &&
|
||||
att.getAttachMimeTag().getValue() != null) {
|
||||
attName = att.getAttachMimeTag().getValue() + " = " + attName;
|
||||
}
|
||||
s.append("Attachment: ").append(attName).append("\n");
|
||||
}
|
||||
|
||||
try {
|
||||
s.append("\n").append(msg.getTextBody()).append("\n");
|
||||
} catch(ChunkNotFoundException e) {}
|
||||
|
||||
return s.toString();
|
||||
}
|
||||
|
||||
/**
|
||||
* Takes a Display focused string, eg "Nick; Jim" and an iterator
|
||||
* of emails, and does its best to return something like
|
||||
* "Nick <nick@example.com>; Jim <jim@example.com>"
|
||||
*/
|
||||
protected void handleEmails(StringBuilder s, String type, String displayText, StringsIterator emails) {
|
||||
if(displayText == null || displayText.length() == 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
String[] names = displayText.split(";\\s*");
|
||||
boolean first = true;
|
||||
|
||||
s.append(type).append(": ");
|
||||
for(String name : names) {
|
||||
if(first) {
|
||||
first = false;
|
||||
} else {
|
||||
s.append("; ");
|
||||
}
|
||||
|
||||
s.append(name);
|
||||
if(emails.hasNext()) {
|
||||
String email = emails.next();
|
||||
// Append the email address in <>, assuming
|
||||
// the name wasn't already the email address
|
||||
if(! email.equals(name)) {
|
||||
s.append(" <").append(email).append(">");
|
||||
}
|
||||
}
|
||||
}
|
||||
s.append("\n");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,201 @@
|
|||
/* ====================================================================
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==================================================================== */
|
||||
package org.apache.poi.hsmf.extractor;
|
||||
|
||||
import org.apache.poi.extractor.POIOLE2TextExtractor;
|
||||
import org.apache.poi.hsmf.MAPIMessage;
|
||||
import org.apache.poi.hsmf.datatypes.AttachmentChunks;
|
||||
import org.apache.poi.hsmf.datatypes.StringChunk;
|
||||
import org.apache.poi.hsmf.exceptions.ChunkNotFoundException;
|
||||
import org.apache.poi.poifs.filesystem.DirectoryNode;
|
||||
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
|
||||
import org.apache.poi.util.LocaleUtil;
|
||||
import org.apache.poi.util.Removal;
|
||||
import org.apache.poi.util.StringUtil.StringsIterator;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.text.SimpleDateFormat;
|
||||
import java.util.Locale;
|
||||
|
||||
import static org.apache.poi.util.StringUtil.startsWithIgnoreCase;
|
||||
|
||||
/**
|
||||
* A text extractor for HSMF (Outlook) .msg files.
|
||||
* Outputs in a format somewhat like a plain text email.
|
||||
*
|
||||
* @since 4.1.2
|
||||
*/
|
||||
public class OutlookTextExtractor extends POIOLE2TextExtractor {
|
||||
public OutlookTextExtractor(MAPIMessage msg) {
|
||||
super(msg);
|
||||
}
|
||||
|
||||
public OutlookTextExtractor(DirectoryNode poifsDir) throws IOException {
|
||||
this(new MAPIMessage(poifsDir));
|
||||
}
|
||||
|
||||
public OutlookTextExtractor(POIFSFileSystem fs) throws IOException {
|
||||
this(new MAPIMessage(fs));
|
||||
}
|
||||
|
||||
public OutlookTextExtractor(InputStream inp) throws IOException {
|
||||
this(new MAPIMessage(inp));
|
||||
}
|
||||
|
||||
public static void main(String[] args) throws Exception {
|
||||
for (String filename : args) {
|
||||
try (POIFSFileSystem poifs = new POIFSFileSystem(new File(filename));
|
||||
OutlookTextExtractor extractor = new OutlookTextExtractor(poifs)) {
|
||||
System.out.println(extractor.getText());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the underlying MAPI message
|
||||
*/
|
||||
public MAPIMessage getMAPIMessage() {
|
||||
return (MAPIMessage) document;
|
||||
}
|
||||
|
||||
/**
|
||||
* Outputs something a little like a RFC822 email
|
||||
*/
|
||||
public String getText() {
|
||||
MAPIMessage msg = (MAPIMessage) document;
|
||||
StringBuilder s = new StringBuilder();
|
||||
|
||||
// See if we can get a suitable encoding for any
|
||||
// non unicode text in the file
|
||||
msg.guess7BitEncoding();
|
||||
|
||||
// Off we go
|
||||
StringsIterator emails;
|
||||
try {
|
||||
emails = new StringsIterator(
|
||||
msg.getRecipientEmailAddressList()
|
||||
);
|
||||
} catch (ChunkNotFoundException e) {
|
||||
emails = new StringsIterator(new String[0]);
|
||||
}
|
||||
|
||||
try {
|
||||
s.append("From: " + msg.getDisplayFrom() + "\n");
|
||||
} catch (ChunkNotFoundException e) {
|
||||
}
|
||||
|
||||
// For To, CC and BCC, try to match the names
|
||||
// up with their email addresses. Relies on the
|
||||
// Recipient Chunks being in the same order as
|
||||
// people in To + CC + BCC.
|
||||
try {
|
||||
handleEmails(s, "To", msg.getDisplayTo(), emails);
|
||||
} catch (ChunkNotFoundException e) {
|
||||
}
|
||||
try {
|
||||
handleEmails(s, "CC", msg.getDisplayCC(), emails);
|
||||
} catch (ChunkNotFoundException e) {
|
||||
}
|
||||
try {
|
||||
handleEmails(s, "BCC", msg.getDisplayBCC(), emails);
|
||||
} catch (ChunkNotFoundException e) {
|
||||
}
|
||||
|
||||
// Date - try two ways to find it
|
||||
try {
|
||||
// First try via the proper chunk
|
||||
SimpleDateFormat f = new SimpleDateFormat("E, d MMM yyyy HH:mm:ss Z", Locale.ROOT);
|
||||
f.setTimeZone(LocaleUtil.getUserTimeZone());
|
||||
s.append("Date: ").append(f.format(msg.getMessageDate().getTime())).append("\n");
|
||||
} catch (ChunkNotFoundException e) {
|
||||
try {
|
||||
// Failing that try via the raw headers
|
||||
String[] headers = msg.getHeaders();
|
||||
for (String header : headers) {
|
||||
if (startsWithIgnoreCase(header, "date:")) {
|
||||
s.append("Date:").append(header, header.indexOf(':') + 1, header.length()).append("\n");
|
||||
break;
|
||||
}
|
||||
}
|
||||
} catch (ChunkNotFoundException he) {
|
||||
// We can't find the date, sorry...
|
||||
}
|
||||
}
|
||||
|
||||
try {
|
||||
s.append("Subject: ").append(msg.getSubject()).append("\n");
|
||||
} catch (ChunkNotFoundException e) {
|
||||
}
|
||||
|
||||
// Display attachment names
|
||||
// To get the attachments, use ExtractorFactory
|
||||
for (AttachmentChunks att : msg.getAttachmentFiles()) {
|
||||
StringChunk name = att.getAttachLongFileName();
|
||||
if (name == null) name = att.getAttachFileName();
|
||||
String attName = name == null ? null : name.getValue();
|
||||
|
||||
if (att.getAttachMimeTag() != null &&
|
||||
att.getAttachMimeTag().getValue() != null) {
|
||||
attName = att.getAttachMimeTag().getValue() + " = " + attName;
|
||||
}
|
||||
s.append("Attachment: ").append(attName).append("\n");
|
||||
}
|
||||
|
||||
try {
|
||||
s.append("\n").append(msg.getTextBody()).append("\n");
|
||||
} catch (ChunkNotFoundException e) {
|
||||
}
|
||||
|
||||
return s.toString();
|
||||
}
|
||||
|
||||
/**
|
||||
* Takes a Display focused string, eg "Nick; Jim" and an iterator
|
||||
* of emails, and does its best to return something like
|
||||
* "Nick <nick@example.com>; Jim <jim@example.com>"
|
||||
*/
|
||||
protected void handleEmails(StringBuilder s, String type, String displayText, StringsIterator emails) {
|
||||
if (displayText == null || displayText.length() == 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
String[] names = displayText.split(";\\s*");
|
||||
boolean first = true;
|
||||
|
||||
s.append(type).append(": ");
|
||||
for (String name : names) {
|
||||
if (first) {
|
||||
first = false;
|
||||
} else {
|
||||
s.append("; ");
|
||||
}
|
||||
|
||||
s.append(name);
|
||||
if (emails.hasNext()) {
|
||||
String email = emails.next();
|
||||
// Append the email address in <>, assuming
|
||||
// the name wasn't already the email address
|
||||
if (!email.equals(name)) {
|
||||
s.append(" <").append(email).append(">");
|
||||
}
|
||||
}
|
||||
}
|
||||
s.append("\n");
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue