Correct XWPFRun detection of bold/italic in a paragraph with multiple runs of different styles

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@996976 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Nick Burch 2010-09-14 16:32:02 +00:00
parent 3d0143a196
commit 6b1a10ac97
6 changed files with 167 additions and 10 deletions

View File

@ -34,6 +34,7 @@
<changes>
<release version="3.7-beta3" date="2010-??-??">
<action dev="poi-developers" type="fix">Correct XWPFRun detection of bold/italic in a paragraph with multiple runs of different styles</action>
<action dev="poi-developers" type="add">Link XWPFPicture to XWPFRun, so that embedded pictures can be access from where they live in the text stream</action>
<action dev="poi-developers" type="fix">Improve handling of Hyperlinks inside XWPFParagraph objects through XWPFHyperlinkRun</action>
<action dev="poi-developers" type="fix">Make XWPFParagraph make more use of XWPFRun, and less on internal StringBuffers</action>

View File

@ -1161,7 +1161,7 @@ public class XWPFDocument extends POIXMLDocument implements Document, IBody {
* @see org.apache.poi.xwpf.usermodel.IBody#getParagraphArray(int)
*/
public XWPFParagraph getParagraphArray(int pos) {
if(pos > 0 && pos < paragraphs.size()){
if(pos >= 0 && pos < paragraphs.size()){
return paragraphs.get(pos);
}
return null;

View File

@ -119,6 +119,19 @@ public class XWPFRun {
return paragraph;
}
/**
* For isBold, isItalic etc
*/
private boolean isCTOnOff(CTOnOff onoff) {
if(! onoff.isSetVal())
return true;
if(onoff.getVal() == STOnOff.ON)
return true;
if(onoff.getVal() == STOnOff.TRUE)
return true;
return false;
}
/**
* Whether the bold property shall be applied to all non-complex script
* characters in the contents of this run when displayed in a document
@ -127,7 +140,9 @@ public class XWPFRun {
*/
public boolean isBold() {
CTRPr pr = run.getRPr();
return pr != null && pr.isSetB();
if(pr == null || !pr.isSetB())
return false;
return isCTOnOff(pr.getB());
}
/**
@ -208,7 +223,9 @@ public class XWPFRun {
*/
public boolean isItalic() {
CTRPr pr = run.getRPr();
return pr != null && pr.isSetI();
if(pr == null || !pr.isSetI())
return false;
return isCTOnOff(pr.getI());
}
/**
@ -284,7 +301,9 @@ public class XWPFRun {
*/
public boolean isStrike() {
CTRPr pr = run.getRPr();
return pr != null && pr.isSetStrike();
if(pr == null || !pr.isSetStrike())
return false;
return isCTOnOff(pr.getStrike());
}
/**

View File

@ -98,16 +98,20 @@ public class TestXWPFWordExtractor extends TestCase {
// Now check contents
extractor.setFetchHyperlinks(false);
assertEquals(
"This is a test document\nThis bit is in bold and italic\n" +
"Back to normal\nWe have a hyperlink here, and another.\n",
"This is a test document.\nThis bit is in bold and italic\n" +
"Back to normal\n" +
"This contains BOLD, ITALIC and BOTH, as well as RED and YELLOW text.\n" +
"We have a hyperlink here, and another.\n",
extractor.getText()
);
// One hyperlink is a real one, one is just to the top of page
extractor.setFetchHyperlinks(true);
assertEquals(
"This is a test document\nThis bit is in bold and italic\n" +
"Back to normal\nWe have a hyperlink <http://poi.apache.org/> here, and another.\n",
"This is a test document.\nThis bit is in bold and italic\n" +
"Back to normal\n" +
"This contains BOLD, ITALIC and BOTH, as well as RED and YELLOW text.\n" +
"We have a hyperlink <http://poi.apache.org/> here, and another.\n",
extractor.getText()
);
}

View File

@ -20,6 +20,7 @@ import java.math.BigInteger;
import junit.framework.TestCase;
import org.apache.poi.xwpf.XWPFTestDataSamples;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTBr;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTR;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTRPr;
@ -191,6 +192,138 @@ public class TestXWPFRun extends TestCase {
assertEquals(2, run.getCTR().sizeOfBrArray());
}
/**
* Test that on an existing document, we do the
* right thing with it
*/
public void testExisting() {
XWPFDocument doc = XWPFTestDataSamples.openSampleDocument("TestDocument.docx");
XWPFParagraph p;
XWPFRun run;
// First paragraph is simple
p = doc.getParagraphArray(0);
assertEquals("This is a test document.", p.getText());
assertEquals(2, p.getRuns().size());
run = p.getRuns().get(0);
assertEquals("This is a test document", run.toString());
assertEquals(false, run.isBold());
assertEquals(false, run.isItalic());
assertEquals(false, run.isStrike());
assertEquals(null, run.getCTR().getRPr());
run = p.getRuns().get(1);
assertEquals(".", run.toString());
assertEquals(false, run.isBold());
assertEquals(false, run.isItalic());
assertEquals(false, run.isStrike());
assertEquals(null, run.getCTR().getRPr());
// Next paragraph is all in one style, but a different one
p = doc.getParagraphArray(1);
assertEquals("This bit is in bold and italic", p.getText());
assertEquals(1, p.getRuns().size());
run = p.getRuns().get(0);
assertEquals("This bit is in bold and italic", run.toString());
assertEquals(true, run.isBold());
assertEquals(true, run.isItalic());
assertEquals(false, run.isStrike());
assertEquals(true, run.getCTR().getRPr().isSetB());
assertEquals(false, run.getCTR().getRPr().getB().isSetVal());
// Back to normal
p = doc.getParagraphArray(2);
assertEquals("Back to normal", p.getText());
assertEquals(1, p.getRuns().size());
run = p.getRuns().get(0);
assertEquals("Back to normal", run.toString());
assertEquals(false, run.isBold());
assertEquals(false, run.isItalic());
assertEquals(false, run.isStrike());
assertEquals(null, run.getCTR().getRPr());
// Different styles in one paragraph
p = doc.getParagraphArray(3);
assertEquals("This contains BOLD, ITALIC and BOTH, as well as RED and YELLOW text.", p.getText());
assertEquals(11, p.getRuns().size());
run = p.getRuns().get(0);
assertEquals("This contains ", run.toString());
assertEquals(false, run.isBold());
assertEquals(false, run.isItalic());
assertEquals(false, run.isStrike());
assertEquals(null, run.getCTR().getRPr());
run = p.getRuns().get(1);
assertEquals("BOLD", run.toString());
assertEquals(true, run.isBold());
assertEquals(false, run.isItalic());
assertEquals(false, run.isStrike());
run = p.getRuns().get(2);
assertEquals(", ", run.toString());
assertEquals(false, run.isBold());
assertEquals(false, run.isItalic());
assertEquals(false, run.isStrike());
assertEquals(null, run.getCTR().getRPr());
run = p.getRuns().get(3);
assertEquals("ITALIC", run.toString());
assertEquals(false, run.isBold());
assertEquals(true, run.isItalic());
assertEquals(false, run.isStrike());
run = p.getRuns().get(4);
assertEquals(" and ", run.toString());
assertEquals(false, run.isBold());
assertEquals(false, run.isItalic());
assertEquals(false, run.isStrike());
assertEquals(null, run.getCTR().getRPr());
run = p.getRuns().get(5);
assertEquals("BOTH", run.toString());
assertEquals(true, run.isBold());
assertEquals(true, run.isItalic());
assertEquals(false, run.isStrike());
run = p.getRuns().get(6);
assertEquals(", as well as ", run.toString());
assertEquals(false, run.isBold());
assertEquals(false, run.isItalic());
assertEquals(false, run.isStrike());
assertEquals(null, run.getCTR().getRPr());
run = p.getRuns().get(7);
assertEquals("RED", run.toString());
assertEquals(false, run.isBold());
assertEquals(false, run.isItalic());
assertEquals(false, run.isStrike());
run = p.getRuns().get(8);
assertEquals(" and ", run.toString());
assertEquals(false, run.isBold());
assertEquals(false, run.isItalic());
assertEquals(false, run.isStrike());
assertEquals(null, run.getCTR().getRPr());
run = p.getRuns().get(9);
assertEquals("YELLOW", run.toString());
assertEquals(false, run.isBold());
assertEquals(false, run.isItalic());
assertEquals(false, run.isStrike());
run = p.getRuns().get(10);
assertEquals(" text.", run.toString());
assertEquals(false, run.isBold());
assertEquals(false, run.isItalic());
assertEquals(false, run.isStrike());
assertEquals(null, run.getCTR().getRPr());
}
}

Binary file not shown.