mirror of https://github.com/apache/poi.git
Include a test for the text extraction code. Update code to use POILogger instead of System.err. Update the NOTICE file for the TLP change, and add entries for JUnit and the small GPL v3 bits of HDGF
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@551273 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
97fb171369
commit
211c9e157a
13
legal/NOTICE
13
legal/NOTICE
|
@ -1,5 +1,16 @@
|
|||
Apache Jakarta POI
|
||||
Apache POI
|
||||
Copyright 2001-2007 The Apache Software Foundation
|
||||
|
||||
This product includes software developed by
|
||||
The Apache Software Foundation (http://www.apache.org/).
|
||||
|
||||
|
||||
Unit testing support is provided by JUnit, under the
|
||||
Common Public License Version 1.0:
|
||||
http://www.opensource.org/licenses/cpl.php
|
||||
See http://www.junit.org/
|
||||
|
||||
Small parts of the POI component HDGF are based on VSDump,
|
||||
and are under the GNU General Public Licence version 3 (GPL v3):
|
||||
http://gplv3.fsf.org/
|
||||
See http://www.gnome.ru/projects/vsdump_en.html
|
||||
|
|
|
@ -20,6 +20,8 @@ import java.util.ArrayList;
|
|||
|
||||
import org.apache.poi.hdgf.chunks.ChunkFactory.CommandDefinition;
|
||||
import org.apache.poi.util.LittleEndian;
|
||||
import org.apache.poi.util.POILogFactory;
|
||||
import org.apache.poi.util.POILogger;
|
||||
import org.apache.poi.util.StringUtil;
|
||||
|
||||
/**
|
||||
|
@ -45,6 +47,9 @@ public class Chunk {
|
|||
/** The name of the chunk, as found from the commandDefinitions */
|
||||
private String name;
|
||||
|
||||
/** For logging warnings about the structure of the file */
|
||||
private POILogger logger = POILogFactory.getLogger(Chunk.class);
|
||||
|
||||
public Chunk(ChunkHeader header, ChunkTrailer trailer, ChunkSeparator separator, byte[] contents) {
|
||||
this.header = header;
|
||||
this.trailer = trailer;
|
||||
|
@ -149,7 +154,9 @@ public class Chunk {
|
|||
|
||||
// Check we seem to have enough data
|
||||
if(offset >= contents.length) {
|
||||
System.err.println("Command offset " + offset + " past end of data at " + contents.length);
|
||||
logger.log(POILogger.WARN,
|
||||
"Command offset " + offset + " past end of data at " + contents.length
|
||||
);
|
||||
continue;
|
||||
}
|
||||
|
||||
|
@ -207,7 +214,8 @@ public class Chunk {
|
|||
break;
|
||||
|
||||
default:
|
||||
//System.err.println("Warning - Command of type " + type + " not processed!");
|
||||
logger.log(POILogger.INFO,
|
||||
"Command of type " + type + " not processed!");
|
||||
}
|
||||
|
||||
// Add to the array
|
||||
|
|
|
@ -24,6 +24,9 @@ import java.util.ArrayList;
|
|||
import java.util.Hashtable;
|
||||
import java.util.StringTokenizer;
|
||||
|
||||
import org.apache.poi.util.POILogFactory;
|
||||
import org.apache.poi.util.POILogger;
|
||||
|
||||
/**
|
||||
* Factor class to create the appropriate chunks, which
|
||||
* needs the version of the file to process the chunk header
|
||||
|
@ -42,6 +45,9 @@ public class ChunkFactory {
|
|||
private static String chunkTableName =
|
||||
"/org/apache/poi/hdgf/chunks/chunks_parse_cmds.tbl";
|
||||
|
||||
/** For logging problems we spot with the file */
|
||||
private POILogger logger = POILogFactory.getLogger(ChunkFactory.class);
|
||||
|
||||
public ChunkFactory(int version) throws IOException {
|
||||
this.version = version;
|
||||
|
||||
|
@ -107,7 +113,8 @@ public class ChunkFactory {
|
|||
// Check we have enough data, and tweak the header size
|
||||
// as required
|
||||
if(endOfDataPos > data.length) {
|
||||
System.err.println("Header called for " + header.getLength() +" bytes, but that would take us passed the end of the data!");
|
||||
logger.log(POILogger.WARN,
|
||||
"Header called for " + header.getLength() +" bytes, but that would take us passed the end of the data!");
|
||||
|
||||
endOfDataPos = data.length;
|
||||
header.length = data.length - offset - header.getSizeInBytes();
|
||||
|
|
|
@ -57,7 +57,6 @@ public class VisioTextExtractor {
|
|||
for(int i=0; i<hdgf.getTopLevelStreams().length; i++) {
|
||||
findText(hdgf.getTopLevelStreams()[i], text);
|
||||
}
|
||||
System.err.println("Found " + text.size() + " text string");
|
||||
return (String[])text.toArray( new String[text.size()] );
|
||||
}
|
||||
private void findText(Stream stream, ArrayList text) {
|
||||
|
@ -108,6 +107,8 @@ public class VisioTextExtractor {
|
|||
|
||||
VisioTextExtractor extractor =
|
||||
new VisioTextExtractor(new FileInputStream(args[0]));
|
||||
System.out.println(extractor.getText());
|
||||
|
||||
// Print not PrintLn as already has \n added to it
|
||||
System.out.print(extractor.getText());
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,107 @@
|
|||
/* ====================================================================
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==================================================================== */
|
||||
package org.apache.poi.hdgf.extractor;
|
||||
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.PrintStream;
|
||||
|
||||
import junit.framework.TestCase;
|
||||
|
||||
import org.apache.poi.hdgf.HDGFDiagram;
|
||||
import org.apache.poi.hdgf.chunks.Chunk;
|
||||
import org.apache.poi.hdgf.chunks.ChunkFactory;
|
||||
import org.apache.poi.hdgf.pointers.Pointer;
|
||||
import org.apache.poi.hdgf.pointers.PointerFactory;
|
||||
import org.apache.poi.hssf.record.formula.eval.StringOperationEval;
|
||||
import org.apache.poi.poifs.filesystem.DocumentEntry;
|
||||
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
|
||||
|
||||
public class TestVisioExtractor extends TestCase {
|
||||
private String filename;
|
||||
protected void setUp() throws Exception {
|
||||
String dirname = System.getProperty("HDGF.testdata.path");
|
||||
filename = dirname + "/Test_Visio-Some_Random_Text.vsd";
|
||||
}
|
||||
|
||||
/**
|
||||
* Test the 3 different ways of creating one
|
||||
*/
|
||||
public void testCreation() throws Exception {
|
||||
VisioTextExtractor extractor;
|
||||
|
||||
extractor = new VisioTextExtractor(new FileInputStream(filename));
|
||||
assertNotNull(extractor);
|
||||
assertNotNull(extractor.getAllText());
|
||||
assertEquals(3, extractor.getAllText().length);
|
||||
|
||||
extractor = new VisioTextExtractor(
|
||||
new POIFSFileSystem(
|
||||
new FileInputStream(filename)
|
||||
)
|
||||
);
|
||||
assertNotNull(extractor);
|
||||
assertNotNull(extractor.getAllText());
|
||||
assertEquals(3, extractor.getAllText().length);
|
||||
|
||||
extractor = new VisioTextExtractor(
|
||||
new HDGFDiagram(
|
||||
new POIFSFileSystem(
|
||||
new FileInputStream(filename)
|
||||
)
|
||||
)
|
||||
);
|
||||
assertNotNull(extractor);
|
||||
assertNotNull(extractor.getAllText());
|
||||
assertEquals(3, extractor.getAllText().length);
|
||||
}
|
||||
|
||||
public void testExtraction() throws Exception {
|
||||
VisioTextExtractor extractor =
|
||||
new VisioTextExtractor(new FileInputStream(filename));
|
||||
|
||||
// Check the array fetch
|
||||
String[] text = extractor.getAllText();
|
||||
assertNotNull(text);
|
||||
assertEquals(3, text.length);
|
||||
|
||||
assertEquals("Test View\n", text[0]);
|
||||
assertEquals("I am a test view\n", text[1]);
|
||||
assertEquals("Some random text, on a page\n", text[2]);
|
||||
|
||||
// And the all-in fetch
|
||||
String textS = extractor.getText();
|
||||
assertEquals("Test View\nI am a test view\nSome random text, on a page\n", textS);
|
||||
}
|
||||
|
||||
public void testMain() throws Exception {
|
||||
PrintStream oldOut = System.out;
|
||||
ByteArrayOutputStream baos = new ByteArrayOutputStream();
|
||||
PrintStream capture = new PrintStream(baos);
|
||||
System.setOut(capture);
|
||||
|
||||
VisioTextExtractor.main(new String[] {filename});
|
||||
|
||||
// Put things back
|
||||
System.setOut(oldOut);
|
||||
|
||||
// Check
|
||||
capture.flush();
|
||||
String text = baos.toString();
|
||||
assertEquals("Test View\nI am a test view\nSome random text, on a page\n", text);
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue