continue SXSSF docs updates and polishing

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1133782 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Yegor Kozlov 2011-06-09 10:38:31 +00:00
parent 21405bf0e3
commit 84de0558db
4 changed files with 95 additions and 34 deletions

View File

@ -626,29 +626,38 @@ public class ExampleEventUserModel {
<anchor id="sxssf"/> <anchor id="sxssf"/>
<section><title>SXSSF (Streaming Usermodel API)</title> <section><title>SXSSF (Streaming Usermodel API)</title>
<p> <p>
XSSF is an API-compatible streaming extension of XSSF to be used when SXSSF (package: org.apache.poi.xssf.streaming) is an API-compatible streaming extension of XSSF to be used when
very large spreadsheets have to be produced, and heap space is limited. very large spreadsheets have to be produced, and heap space is limited.
SXSSF achieves its low memory footprint by limiting access to the rows that SXSSF achieves its low memory footprint by limiting access to the rows that
are within a sliding window, while XSSF gives access to all rows in the are within a sliding window, while XSSF gives access to all rows in the
document. Older rows that are no longer in the window become inaccessible, document. Older rows that are no longer in the window become inaccessible,
as they are written to the disk. as they are written to the disk.
</p> </p>
<p> <p>
You can specify the window size at workbook construction time via <em>new SXSSFWorkbook(int windowSize)</em>
or you can set it per-sheet via <em>SXSSFSheet#setRandomAccessWindowSize(int windowSize)</em>
</p>
<p>
When a new row is created via createRow() and the total number When a new row is created via createRow() and the total number
of unflushed records would exeed the specified window size, then the of unflushed records would exceed the specified window size, then the
row with the lowest index value is flushed and cannot be accessed row with the lowest index value is flushed and cannot be accessed
via getRow() anymore. via getRow() anymore.
</p> </p>
<p> <p>
A value of -1 indicates unlimited access. In this case all The default window size is <em>100</em> and defined by SXSSFWorkbook.DEFAULT_WINDOW_SIZE.
records that have not been flushed by a call to flush() are available
for random access.
</p> </p>
<p>
<source><![CDATA[ A windowSize of -1 indicates unlimited access. In this case all
package com.dinom.excel; records that have not been flushed by a call to flushRows() are available
for random access.
</p>
<p> The example below writes a sheet with a window of 100 rows. When the row count reaches 101,
the row with rownum=0 is flushed to disk and removed from memory, when rownum reaches 102 then the row with rownum=1 is flushed, etc.
</p>
<source><![CDATA[
import org.apache.poi.ss.SpreadsheetVersion;
import org.apache.poi.ss.usermodel.Cell; import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.Row; import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.ss.usermodel.Sheet; import org.apache.poi.ss.usermodel.Sheet;
@ -656,38 +665,68 @@ import org.apache.poi.ss.usermodel.Workbook;
import org.apache.poi.ss.util.CellReference; import org.apache.poi.ss.util.CellReference;
import org.apache.poi.xssf.streaming.SXSSFWorkbook; import org.apache.poi.xssf.streaming.SXSSFWorkbook;
public class Main {
public static void main(String[] args) throws Throwable { public static void main(String[] args) throws Throwable {
Workbook wb = new SXSSFWorkbook(100); // keep 100 rows in memory Workbook wb = new SXSSFWorkbook(100); // keep 100 rows in memory, exceeding rows will be flushed to disk
Sheet sh = wb.createSheet(); Sheet sh = wb.createSheet();
for(int rownum = 0; rownum < 100000; rownum++){ for(int rownum = 0; rownum < 1000; rownum++){
Row row = sh.createRow(rownum); Row row = sh.createRow(rownum);
for(int cellnum = 0; cellnum < 1000; cellnum++){ for(int cellnum = 0; cellnum < 10; cellnum++){
Cell cell = row.createCell(cellnum); Cell cell = row.createCell(cellnum);
String address = new CellReference(cell).formatAsString(); String address = new CellReference(cell).formatAsString();
cell.setCellValue(address); cell.setCellValue(address);
} }
// previous row is withing the window and accessible
Row prev = sheet.getRow(rownum - 1);
} for(Row rowInMemory : sh) {
// the row iterator iterates over rows in memory, i.e. over the last 100 rows
System.out.println("Row in memory: " + rowInMemory.getRowNum());
}
}
FileOutputStream out = new FileOutputStream("/temp/sxssf.xlsx");
wb.write(out);
out.close();
}
]]></source>
<p>The next example turns off auto-flashing (windowSize=-1) and the code manually controls how portions of data are written to disk</p>
<source><![CDATA[
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.ss.usermodel.Workbook;
import org.apache.poi.ss.util.CellReference;
import org.apache.poi.xssf.streaming.SXSSFWorkbook;
public static void main(String[] args) throws Throwable {
Workbook wb = new SXSSFWorkbook(-1); // turn off auto-flashing and accumulate all rows in memory
Sheet sh = wb.createSheet();
for(int rownum = 0; rownum < 1000; rownum++){
Row row = sh.createRow(rownum);
for(int cellnum = 0; cellnum < 10; cellnum++){
Cell cell = row.createCell(cellnum);
String address = new CellReference(cell).formatAsString();
cell.setCellValue(address);
}
// manually control how rows are flushed to disk
if(rownum % 100 == 0) {
((SXSSFSheet)sh).flushRows(100); // retain 100 last rows and flush all others
// ((SXSSFSheet)sh).flushRows() is a shortcut for ((SXSSFSheet)sh).flushRows(0),
// this method flushes all rows
}
// attempt to access flushed rows results in a exception:
try {
Row firstRow = sheet.getRow(0);
} catch (Exception e){
System.out.println("cannot access flushed rows");
} }
FileOutputStream out = new FileOutputStream("/temp/sxssf.xlsx"); FileOutputStream out = new FileOutputStream("/temp/sxssf.xlsx");
wb.write(out); wb.write(out);
out.close(); out.close();
} }
}
]]></source> ]]></source>
</section> </section>

View File

@ -79,10 +79,10 @@
</section> </section>
<section> <section>
<title>SXSSF (SInce POI 3.8 beta3)</title> <title>SXSSF (Since POI 3.8 beta3)</title>
<p>Since 3.8-beta3, POI provides a low-memory footprint SXSSF API built on top of XSSF.</p> <p>Since 3.8-beta3, POI provides a low-memory footprint SXSSF API built on top of XSSF.</p>
<p> <p>
XSSF is an API-compatible streaming extension of XSSF to be used when SXSSF is an API-compatible streaming extension of XSSF to be used when
very large spreadsheets have to be produced, and heap space is limited. very large spreadsheets have to be produced, and heap space is limited.
SXSSF achieves its low memory footprint by limiting access to the rows that SXSSF achieves its low memory footprint by limiting access to the rows that
are within a sliding window, while XSSF gives access to all rows in the are within a sliding window, while XSSF gives access to all rows in the
@ -90,7 +90,10 @@ document. Older rows that are no longer in the window become inaccessible,
as they are written to the disk. as they are written to the disk.
</p> </p>
<p> <p>
In auto-flush mode the size of the access window can be specified, to hold a certain number of rows in memory. When that value is reached, the creationof an additional row causes the row with the lowest index to to be removed from the access window and written to disk.. Or, the window size can be set to grow dynamically; it can be trimmed periodically by an explicit call to flush(int keepRows) as needed. In auto-flush mode the size of the access window can be specified, to hold a certain number of rows in memory.
When that value is reached, the creation of an additional row causes the row with the lowest index to to be
removed from the access window and written to disk. Or, the window size can be set to grow dynamically;
it can be trimmed periodically by an explicit call to flushRows(int keepRows) as needed.
</p> </p>
<p> <p>
Due to the streaming nature of the implementation, there are the following Due to the streaming nature of the implementation, there are the following
@ -102,6 +105,8 @@ limitations when compared to XSSF:
<li>Formula evaluation is not supported</li> <li>Formula evaluation is not supported</li>
</ul> </ul>
<p> See more details at <link href="how-to.html#sxssf">SXSSF How-To</link></p>
<p>The table below synopsizes the comparative features of POI's Spreadsheet API:</p> <p>The table below synopsizes the comparative features of POI's Spreadsheet API:</p>
<p><em>Spreadsheet API Feature Summary</em></p> <p><em>Spreadsheet API Feature Summary</em></p>

View File

@ -37,7 +37,7 @@ import org.apache.poi.xssf.usermodel.*;
* 3. Substitute the sheet in the template with the generated data * 3. Substitute the sheet in the template with the generated data
* *
* <p> * <p>
* Since 3.8-beta3 POI provides a low-memory footprint SXSSF API which implementing the "BigGridDemo" strategy. * Since 3.8-beta3 POI provides a low-memory footprint SXSSF API which implementing the "BigGridDemo" strategy.
* XSSF is an API-compatible streaming extension of XSSF to be used when * XSSF is an API-compatible streaming extension of XSSF to be used when
* very large spreadsheets have to be produced, and heap space is limited. * very large spreadsheets have to be produced, and heap space is limited.
* SXSSF achieves its low memory footprint by limiting access to the rows that * SXSSF achieves its low memory footprint by limiting access to the rows that
@ -45,9 +45,11 @@ import org.apache.poi.xssf.usermodel.*;
* document. Older rows that are no longer in the window become inaccessible, * document. Older rows that are no longer in the window become inaccessible,
* as they are written to the disk. * as they are written to the disk.
* </p> * </p>
* See <a "http://poi.apache.org/spreadsheet/how-to.html#sxssf">
* http://poi.apache.org/spreadsheet/how-to.html#sxssf</a>.
* *
* @author Yegor Kozlov * @author Yegor Kozlov
* @see <a "http://poi.apache.org/spreadsheet/how-to.html#sxssf">http://poi.apache.org/spreadsheet/how-to.html#sxssf</a>
*/ */
public class BigGridDemo { public class BigGridDemo {
private static final String XML_ENCODING = "UTF-8"; private static final String XML_ENCODING = "UTF-8";

View File

@ -22,6 +22,7 @@ import java.util.Iterator;
import java.util.TreeMap; import java.util.TreeMap;
import java.util.Map; import java.util.Map;
import org.apache.poi.hpsf.IllegalPropertySetDataException;
import org.apache.poi.ss.usermodel.*; import org.apache.poi.ss.usermodel.*;
import org.apache.poi.ss.util.CellReference; import org.apache.poi.ss.util.CellReference;
@ -1179,20 +1180,34 @@ public class SXSSFSheet implements Sheet, Cloneable
* A value of 0 is not allowed because it would flush any newly created row * A value of 0 is not allowed because it would flush any newly created row
* without having a chance to specify any cells. * without having a chance to specify any cells.
*/ */
void setRandomAccessWindowSize(int value) public void setRandomAccessWindowSize(int value)
{ {
assert value!=0; if(value == 0 || value < -1) {
throw new IllegalArgumentException("RandomAccessWindowSize must be either -1 or a positive integer");
}
_randomAccessWindowSize=value; _randomAccessWindowSize=value;
} }
/** /**
* Specifies how many rows can be accessed at most via getRow(). * Specifies how many rows can be accessed at most via getRow().
* The exeeding rows (if any) are flushed to the disk while rows * The exeeding rows (if any) are flushed to the disk while rows
* with lower index values are flushed first. * with lower index values are flushed first.
*/ */
void flushRows(int remaining) throws IOException public void flushRows(int remaining) throws IOException
{ {
while(_rows.size()>remaining) flushOneRow(); while(_rows.size() > remaining) flushOneRow();
} }
/**
* Flush all rows to disk. After this call no rows can be accessed via getRow()
*
* @throws IOException
*/
public void flushRows() throws IOException
{
this.flushRows(0);
}
private void flushOneRow() throws IOException private void flushOneRow() throws IOException
{ {
Map.Entry<Integer,SXSSFRow> firstEntry=_rows.firstEntry(); Map.Entry<Integer,SXSSFRow> firstEntry=_rows.firstEntry();