On our previous post we know that Apache POI have 2 types of model
- User Model
- Event Model
Now we try to learn how to read .xls and .xlsx file using Event Model.
Q.Why Event Model from Apache POI ?
Ans: Though User Model is easy to use from coding part and mostly used from everywhere, but it is not efficient when Excel file is very large maximum time user got error like "Out Of Memory Exception", for this reason we need to know how to manage large excel file using Apache POI Event Model.
Here we need to add this .jar with our projects
1. poi-3.10-FINAL-20140208.jar if we try to read only .xls file then it is enough but if we try to read .xlsx file then we need to add this .jar as given below.
2.poi-ooxml-3.10-FINAL-20140208.jar
3.poi-ooxml-schemas-3.10-FINAL-20140208.jar
4.xmlbeans-2.3.0.jar
5.dom4j-1.6.1.jar
Code For Reading .xls File
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import org.apache.poi.hssf.eventusermodel.HSSFEventFactory;
import org.apache.poi.hssf.eventusermodel.HSSFListener;
import org.apache.poi.hssf.eventusermodel.HSSFRequest;
import org.apache.poi.hssf.record.BOFRecord;
import org.apache.poi.hssf.record.BoundSheetRecord;
import org.apache.poi.hssf.record.LabelSSTRecord;
import org.apache.poi.hssf.record.NumberRecord;
import org.apache.poi.hssf.record.Record;
import org.apache.poi.hssf.record.RowRecord;
import org.apache.poi.hssf.record.SSTRecord;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
public class EventModelXls
implements HSSFListener {
private SSTRecord sstrec;
//Give the SheetName which we want to read
String mySheetName="Sheet2";
ArrayList allSheetName =new ArrayList();
int j=0;
/**
* This method listens for incoming records and handles them as required.
*
* @param record The record that was found while reading.
*/
@Override
public void processRecord(Record record) {
switch (record.getSid()) {
// the BOFRecord can represent either the beginning of a sheet or the workbook
case BOFRecord.sid:
BOFRecord bof = (BOFRecord) record;
if (bof.getType() == bof.TYPE_WORKBOOK) {
System.out.println("Encountered workbook");
// assigned to the class level member
} else if (bof.getType() == bof.TYPE_WORKSHEET) {
System.out.println("Encountered sheet reference" +"SheetName ="+allSheetName.get(j));
j++;
}
break;
case BoundSheetRecord.sid:
BoundSheetRecord bsr = (BoundSheetRecord) record;
System.out.println("New sheet named: " + bsr.getSheetname());
allSheetName.add(bsr.getSheetname());
break;
case RowRecord.sid:
RowRecord rowrec = (RowRecord) record;
System.out.println("Row found, first column at "
+ rowrec.getFirstCol() + " last column at " + rowrec.getLastCol());
break;
case NumberRecord.sid:
if(mySheetName.equalsIgnoreCase(allSheetName.get(j-1).toString())){
NumberRecord numrec = (NumberRecord) record;
System.out.println("Cell found with value" + numrec.getValue()
+ " at row " + numrec.getRow() + " and column " + numrec.getColumn());
}
break;
// SSTRecords store a array of unique strings used in Excel.
case SSTRecord.sid:
sstrec = (SSTRecord) record;
for (int k = 0; k < sstrec.getNumUniqueStrings(); k++) {
System.out.println("String table value " + k + " = " + sstrec.getString(k) +sstrec.getNumStrings());
}
break;
case LabelSSTRecord.sid:
if(mySheetName.equalsIgnoreCase(allSheetName.get(j-1).toString())){
LabelSSTRecord lrec = (LabelSSTRecord) record;
System.out.println("String cell found with value "
+ sstrec.getString(lrec.getSSTIndex())+ " at row " + lrec.getRow() + " and column " + lrec.getColumn());
}break;
}
}
/**
* Read an excel file and spit out what we find.
*
* @param args Expect one argument that is the file to read.
* @throws IOException When there is an error processing the file.
*/
public static void main(String[] args) throws IOException {
// create a new file input stream with the input file specified
// at the command line
FileInputStream fin = new FileInputStream(new File("resources/Test1.xls"));
// create a new org.apache.poi.poifs.filesystem.Filesystem
POIFSFileSystem poifs = new POIFSFileSystem(fin);
// get the Workbook (excel part) stream in a InputStream
InputStream din = poifs.createDocumentInputStream("Workbook");
// construct out HSSFRequest object
HSSFRequest req = new HSSFRequest();
// lazy listen for ALL records with the listener shown above
req.addListenerForAllRecords(new EventModelXls());
// create our event factory
HSSFEventFactory factory = new HSSFEventFactory();
// process our events based on the document input stream
factory.processEvents(req, din);
// once all the events are processed close our file input stream
fin.close();
// and our document input stream (don't want to leak these!)
din.close();
System.out.println("done.");
}
}
Code For Reading .xlsx File
import java.io.InputStream;
import java.util.Iterator;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.xssf.eventusermodel.XSSFReader;
import org.apache.poi.xssf.model.SharedStringsTable;
import org.apache.poi.xssf.usermodel.XSSFRichTextString;
import org.xml.sax.Attributes;
import org.xml.sax.ContentHandler;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.DefaultHandler;
import org.xml.sax.helpers.XMLReaderFactory;
public class XlsxEventModel {
public void processOneSheet(String filename) throws Exception {
OPCPackage pkg = OPCPackage.open(filename);
XSSFReader r = new XSSFReader(pkg);
SharedStringsTable sst = r.getSharedStringsTable();
XMLReader parser = fetchSheetParser(sst);
/********************************
*rId2 found by processing the Workbook
* Seems to either be rId# or rSheet#
*/
InputStream sheet2 = r.getSheet("rId2");
InputSource sheetSource = new InputSource(sheet2);
parser.parse(sheetSource);
sheet2.close();
}
public void processAllSheets(String filename) throws Exception {
OPCPackage pkg = OPCPackage.open(filename);
XSSFReader r = new XSSFReader(pkg);
SharedStringsTable sst = r.getSharedStringsTable();
XMLReader parser = fetchSheetParser(sst);
XSSFReader.SheetIterator sheets = (XSSFReader.SheetIterator) r.getSheetsData();
// Iterator<InputStream> sheets = r.getSheetsData();
while (sheets.hasNext()) {
System.out.println("Processing new sheet:\n");
InputStream sheet = sheets.next();
System.out.println("SheetName" + sheets.getSheetName());
InputSource sheetSource = new InputSource(sheet);
parser.parse(sheetSource);
sheet.close();
System.out.println("");
}
}
public XMLReader fetchSheetParser(SharedStringsTable sst) throws SAXException {
/****************************************
* This Part is commented out because This is required
* SAX 2(Apache SAX perser) parser and need xerces-2.8.0.jar
* If xerces-2.8.0.jar is available then commented out
* another SAX(JRE default) parser part and uncomment this.
*
*
* XMLReader parser =
* XMLReaderFactory.createXMLReader(
* "org.apache.xerces.parsers.SAXParser");
*
*/
XMLReader parser =
XMLReaderFactory.createXMLReader(
"com.sun.org.apache.xerces.internal.parsers.SAXParser");
ContentHandler handler = new SheetHandler(sst);
parser.setContentHandler(handler);
return parser;
}
/**
* See org.xml.sax.helpers.DefaultHandler javadocs
*/
private static class SheetHandler extends DefaultHandler {
private SharedStringsTable sst;
private String lastContents;
private boolean nextIsString;
private SheetHandler(SharedStringsTable sst) {
this.sst = sst;
}
@Override
public void startElement(String uri, String localName, String name,
Attributes attributes) throws SAXException {
// c => cell
if (name.equals("c")) {
// Print the cell reference
System.out.print(attributes.getValue("r") + " - ");
// Figure out if the value is an index in the SST
String cellType = attributes.getValue("t");
if (cellType != null && cellType.equals("s")) {
nextIsString = true;
} else {
nextIsString = false;
}
}
// Clear contents cache
lastContents = "";
}
@Override
public void endElement(String uri, String localName, String name)
throws SAXException {
// Process the last contents as required.
// Do now, as characters() may be called more than once
if (nextIsString) {
int idx = Integer.parseInt(lastContents);
lastContents = new XSSFRichTextString(sst.getEntryAt(idx)).toString();
nextIsString = false;
}
// v => contents of a cell
// Output after we've seen the string contents
if (name.equals("v")) {
System.out.println(lastContents);
}
}
@Override
public void characters(char[] ch, int start, int length)
throws SAXException {
lastContents += new String(ch, start, length);
}
}
public static void main(String[] args) throws Exception {
XlsxEventModel howto = new XlsxEventModel();
howto.processAllSheets(("resources/Test1.xlsx"));
}
}
This comment has been removed by the author.
ReplyDeleteWhether we can be able to read both xls and xlsx in same code base using event or event-user model???
ReplyDeleteAs I know you cann't use the same codebase for xlsx and xls because underlying structure of this 2 type of excel is totally different.
DeleteI need to process only first sheet of an XLS file for my application, Currently checking condition in processRecord method to avoid processing of the remaining sheets similar to first example. Is there anyway to stop / remove event Lister after first sheet so that current program can be more efficient.
ReplyDelete