Thursday, March 6, 2014

Read Excel With Apache POI Event Model


On our previous post we know that Apache POI have 2 types of model
  • User Model
  • Event Model
On previous post we learn how to read Excel (.xls and .xlsx file) using Apache POI with User Model.
Now we try to learn how to read .xls and .xlsx file using Event Model.

Q.Why Event Model from Apache POI ?
Ans: Though User Model is easy to use from coding part and mostly used from everywhere, but it is not efficient when Excel file is very large maximum time user got error like "Out Of Memory Exception", for this reason we need to know how to manage large excel file using Apache POI Event Model.


Here we need to add this .jar with our projects
                 
1. poi-3.10-FINAL-20140208.jar if we try to read only .xls file then it is enough but if we try to read .xlsx file then we need to add this .jar as given below.
2.poi-ooxml-3.10-FINAL-20140208.jar
3.poi-ooxml-schemas-3.10-FINAL-20140208.jar
4.xmlbeans-2.3.0.jar
5.dom4j-1.6.1.jar

Code For Reading .xls File 



import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import org.apache.poi.hssf.eventusermodel.HSSFEventFactory;
import org.apache.poi.hssf.eventusermodel.HSSFListener;
import org.apache.poi.hssf.eventusermodel.HSSFRequest;
import org.apache.poi.hssf.record.BOFRecord;
import org.apache.poi.hssf.record.BoundSheetRecord;
import org.apache.poi.hssf.record.LabelSSTRecord;
import org.apache.poi.hssf.record.NumberRecord;
import org.apache.poi.hssf.record.Record;
import org.apache.poi.hssf.record.RowRecord;
import org.apache.poi.hssf.record.SSTRecord;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;

public class EventModelXls
        implements HSSFListener {

    private SSTRecord sstrec;

   //Give the SheetName which we want to read
    String mySheetName="Sheet2";
    ArrayList allSheetName =new ArrayList();
    int j=0;

    /**
     * This method listens for incoming records and handles them as required.
     *
     * @param record The record that was found while reading.
     */
    @Override
    public void processRecord(Record record) {

        switch (record.getSid()) {
            // the BOFRecord can represent either the beginning of a sheet or the workbook
            case BOFRecord.sid:
                BOFRecord bof = (BOFRecord) record;
                if (bof.getType() == bof.TYPE_WORKBOOK) {
                    System.out.println("Encountered workbook");
                    // assigned to the class level member
                } else if (bof.getType() == bof.TYPE_WORKSHEET) {
                    System.out.println("Encountered sheet reference" +"SheetName ="+allSheetName.get(j));
                    j++;
                }
                break;
            case BoundSheetRecord.sid:
                BoundSheetRecord bsr = (BoundSheetRecord) record;
                System.out.println("New sheet named: " + bsr.getSheetname());
                allSheetName.add(bsr.getSheetname());
                break;
            case RowRecord.sid:
                RowRecord rowrec = (RowRecord) record;
                System.out.println("Row found, first column at "
                        + rowrec.getFirstCol() + " last column at " + rowrec.getLastCol());
                break;
            case NumberRecord.sid:
               
             if(mySheetName.equalsIgnoreCase(allSheetName.get(j-1).toString())){
                NumberRecord numrec = (NumberRecord) record;
                System.out.println("Cell found with value" + numrec.getValue()
                        + " at row " + numrec.getRow() + " and column " + numrec.getColumn());
             }
                break;
            // SSTRecords store a array of unique strings used in Excel.
            case SSTRecord.sid:
                sstrec = (SSTRecord) record;
                for (int k = 0; k < sstrec.getNumUniqueStrings(); k++) {
                    System.out.println("String table value " + k + " = " + sstrec.getString(k) +sstrec.getNumStrings());
                }
                break;
            case LabelSSTRecord.sid:
               
                if(mySheetName.equalsIgnoreCase(allSheetName.get(j-1).toString())){
                LabelSSTRecord lrec = (LabelSSTRecord) record;
                System.out.println("String cell found with value "
                        + sstrec.getString(lrec.getSSTIndex())+ " at row " + lrec.getRow() + " and column " + lrec.getColumn());
               
                }break;
        }
    }



    /**
     * Read an excel file and spit out what we find.
     *
     * @param args Expect one argument that is the file to read.
     * @throws IOException When there is an error processing the file.
     */
    public static void main(String[] args) throws IOException {
        // create a new file input stream with the input file specified
        // at the command line

        FileInputStream fin = new FileInputStream(new File("resources/Test1.xls"));
        // create a new org.apache.poi.poifs.filesystem.Filesystem
        POIFSFileSystem poifs = new POIFSFileSystem(fin);
        // get the Workbook (excel part) stream in a InputStream
        InputStream din = poifs.createDocumentInputStream("Workbook");
        // construct out HSSFRequest object
        HSSFRequest req = new HSSFRequest();
        // lazy listen for ALL records with the listener shown above
        req.addListenerForAllRecords(new EventModelXls());
        // create our event factory
        HSSFEventFactory factory = new HSSFEventFactory();
        // process our events based on the document input stream
        factory.processEvents(req, din);
        // once all the events are processed close our file input stream
        fin.close();
        // and our document input stream (don't want to leak these!)
        din.close();
        System.out.println("done.");
    }

}


 Code For Reading .xlsx File 


import java.io.InputStream;
import java.util.Iterator;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.xssf.eventusermodel.XSSFReader;
import org.apache.poi.xssf.model.SharedStringsTable;
import org.apache.poi.xssf.usermodel.XSSFRichTextString;
import org.xml.sax.Attributes;
import org.xml.sax.ContentHandler;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.DefaultHandler;
import org.xml.sax.helpers.XMLReaderFactory;


public class XlsxEventModel {

    public void processOneSheet(String filename) throws Exception {
        OPCPackage pkg = OPCPackage.open(filename);
        XSSFReader r = new XSSFReader(pkg);
        SharedStringsTable sst = r.getSharedStringsTable();

        XMLReader parser = fetchSheetParser(sst);


        /********************************
          *rId2 found by processing the Workbook
          * Seems to either be rId# or rSheet#

        */
        InputStream sheet2 = r.getSheet("rId2");
        InputSource sheetSource = new InputSource(sheet2);
        parser.parse(sheetSource);
        sheet2.close();

    }

    public void processAllSheets(String filename) throws Exception {
        OPCPackage pkg = OPCPackage.open(filename);
        XSSFReader r = new XSSFReader(pkg);
        SharedStringsTable sst = r.getSharedStringsTable();
        XMLReader parser = fetchSheetParser(sst);
        XSSFReader.SheetIterator sheets = (XSSFReader.SheetIterator) r.getSheetsData();
        // Iterator<InputStream> sheets = r.getSheetsData();

        while (sheets.hasNext()) {
            System.out.println("Processing new sheet:\n");
            InputStream sheet = sheets.next();
            System.out.println("SheetName" + sheets.getSheetName());
            InputSource sheetSource = new InputSource(sheet);
            parser.parse(sheetSource);
            sheet.close();
            System.out.println("");
        }
    }


    public XMLReader fetchSheetParser(SharedStringsTable sst) throws SAXException {
        /****************************************
         * This Part is commented out because This is required
         * SAX 2(Apache SAX perser) parser and need xerces-2.8.0.jar
         * If xerces-2.8.0.jar is available then commented out
         * another SAX(JRE default) parser part and uncomment this.

         *
         *
         * XMLReader parser =
         *        XMLReaderFactory.createXMLReader(
         *          "org.apache.xerces.parsers.SAXParser");
         *
         */

        XMLReader parser =
                XMLReaderFactory.createXMLReader(
                "com.sun.org.apache.xerces.internal.parsers.SAXParser");
        ContentHandler handler = new SheetHandler(sst);
        parser.setContentHandler(handler);
        return parser;
    }


    /**
     * See org.xml.sax.helpers.DefaultHandler javadocs
     */

    private static class SheetHandler extends DefaultHandler {

        private SharedStringsTable sst;
        private String lastContents;
        private boolean nextIsString;

        private SheetHandler(SharedStringsTable sst) {
            this.sst = sst;
        }

        @Override
        public void startElement(String uri, String localName, String name,
                Attributes attributes) throws SAXException {

            // c => cell
            if (name.equals("c")) {
                // Print the cell reference
                System.out.print(attributes.getValue("r") + " - ");

            // Figure out if the value is an index in the SST
                String cellType = attributes.getValue("t");
                if (cellType != null && cellType.equals("s")) {
                    nextIsString = true;
                } else {
                    nextIsString = false;
                }
            }

            // Clear contents cache
            lastContents = "";
        }

        @Override
        public void endElement(String uri, String localName, String name)
                throws SAXException {

            // Process the last contents as required.
            // Do now, as characters() may be called more than once

            if (nextIsString) {
                int idx = Integer.parseInt(lastContents);
                lastContents = new XSSFRichTextString(sst.getEntryAt(idx)).toString();
                nextIsString = false;
            }

            // v => contents of a cell
            // Output after we've seen the string contents

            if (name.equals("v")) {
                System.out.println(lastContents);
            }
        }


        @Override
        public void characters(char[] ch, int start, int length)
                throws SAXException {
            lastContents += new String(ch, start, length);
        }
    }


    public static void main(String[] args) throws Exception {
        XlsxEventModel howto = new XlsxEventModel();
        howto.processAllSheets(("resources/Test1.xlsx"));
    }
}


4 comments:

  1. Whether we can be able to read both xls and xlsx in same code base using event or event-user model???

    ReplyDelete
    Replies
    1. As I know you cann't use the same codebase for xlsx and xls because underlying structure of this 2 type of excel is totally different.

      Delete
  2. I need to process only first sheet of an XLS file for my application, Currently checking condition in processRecord method to avoid processing of the remaining sheets similar to first example. Is there anyway to stop / remove event Lister after first sheet so that current program can be more efficient.

    ReplyDelete