001    /*
002     * CSVReader.java
003     * 
004     * Copyright (C) 2005 Anupam Sengupta (anupamsg@users.sourceforge.net) 
005     * 
006     * This program is free software; you can redistribute it and/or 
007     * modify it under the terms of the GNU General Public License 
008     * as published by the Free Software Foundation; either version 2 
009     * of the License, or (at your option) any later version. 
010     * 
011     * This program is distributed in the hope that it will be useful, 
012     * but WITHOUT ANY WARRANTY; without even the implied warranty of 
013     * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 
014     * GNU General Public License for more details. 
015     * 
016     * You should have received a copy of the GNU General Public License
017     * along with this program; if not, write to the Free Software 
018     * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 
019     *
020     * Version $Revision: 1.3 $
021     */
022    package net.sf.anupam.csv;
023    
024    import com.Ostermiller.util.CSVParse;
025    import com.Ostermiller.util.ExcelCSVParser;
026    import org.apache.commons.collections.CollectionUtils;
027    import org.apache.commons.logging.Log;
028    import org.apache.commons.logging.LogFactory;
029    
030    import java.io.IOException;
031    import java.io.Reader;
032    import java.util.ArrayList;
033    import java.util.Iterator;
034    import java.util.List;
035    import java.util.NoSuchElementException;
036    
037    /**
038     * Reads a CSV file and parses the individual fields for each CSV record in the
039     * file. The default delimiter is assumed to be the <code>,</code> (comma).
040     * <p/>
041     * <p/>
042     * The class uses the CSV Parser engines from <a
043     * href="http://ostermiller.org/utils/" target="_blank">Steven Ostermiller's
044     * site</a>.
045     * </p>
046     *
047     * @author Anupam Sengupta
048     * @version $Revision: 1.3 $
049     * @see com.Ostermiller.util.CSVParse
050     * @since 1.5
051     */
052    class CSVReader implements Iterable<List<String>> {
053    
054        /**
055         * Logger to use.
056         */
057        private static final Log LOG = LogFactory.getLog(CSVReader.class);
058    
059        /**
060         * The CSV parser engine.
061         */
062        private CSVParse parser;
063    
064        /**
065         * Flag which indicates whether the reader has read all the records.
066         */
067        private boolean readingComplete;
068    
069        /**
070         * Flag which indicates whether the CSV file has a header row.
071         */
072        private boolean headerPresent;
073    
074        /**
075         * Constructor which accepts a reader on the CSV stream to parse. The
076         * presence of a CSV header row is also specified. If present, the header
077         * row will be skipped.
078         *
079         * @param csvReader     the CSV stream reader from which to parse
080         * @param headerPresent indicates whether the CSV stream has a header record
081         */
082        public CSVReader(final Reader csvReader, final boolean headerPresent) {
083            super();
084            this.headerPresent = headerPresent;
085    
086            parser = new ExcelCSVParser(csvReader);
087    
088        }
089    
090        /**
091         * Releases all system resources.
092         */
093        public void close() {
094            try {
095                if (parser != null) {
096                    parser.close();
097                    LOG.debug("Closed the CSV Reader");
098                }
099            } catch (final IOException e) {
100                // Do nothing
101            } finally {
102                parser = null;
103            }
104        }
105    
106        /**
107         * Finalizes this CSV reader and closes the IO connections.
108         *
109         * @throws Throwable thrown if the finalization fails.
110         * @see Object#finalize()
111         */
112        @Override
113        protected void finalize() throws Throwable {
114            super.finalize();
115            close();
116        }
117    
118        /**
119         * Returns an iterator over the parsed lines. The iterator returns a list of
120         * the CSV field values as a single value over each iteration.
121         *
122         * @return an iterator over the lines.
123         */
124        public Iterator<List<String>> iterator() {
125            return new LineIterator();
126        }
127    
128        // ~ Inner Classes
129        // ----------------------------------------------------------
130    
131        /**
132         * Inner iterator class to provide the Iterable interface to the reader.
133         */
134        private class LineIterator implements Iterator<List<String>> {
135            // ~ Methods
136            // ------------------------------------------------------------
137    
138            /**
139             * The parsed CSV field values.
140             */
141            private String[] parsedValues;
142    
143            /**
144             * Flag indicating whether the previous line was read.
145             */
146            private boolean haveReadPreviousLine;
147    
148            /**
149             * Default Constructor.
150             */
151            public LineIterator() {
152                super();
153                if (isHeaderPresent()) {
154                    readOneLine();
155                }
156            }
157    
158            /**
159             * Returns <code>true</code> if there is at least one more parsed CSV line.
160             *
161             * @return <code>true></code> if there is at least one more parsed line
162             * @see java.util.Iterator#hasNext()
163             */
164            public boolean hasNext() {
165                if (isReadingComplete()) {
166                    return false;
167                }
168    
169                if (!haveReadPreviousLine) {
170                    readOneLine();
171                    haveReadPreviousLine = true;
172                }
173                return !isReadingComplete();
174            }
175    
176            /**
177             * Returns a list of the CSV field values for the current line.
178             *
179             * @return the next list of parsed CSV field values
180             * @see java.util.Iterator#next()
181             */
182            public List<String> next() {
183    
184                if (!haveReadPreviousLine) {
185                    readOneLine();
186                } else {
187                    haveReadPreviousLine = false;
188                }
189    
190                if (isReadingComplete()) {
191                    throw new NoSuchElementException();
192                }
193    
194                final List<String> valueList = new ArrayList<String>(
195                        parsedValues.length);
196                CollectionUtils.addAll(valueList, parsedValues);
197    
198                return valueList;
199    
200            }
201    
202            /**
203             * Reads one CSV line using the CSV parser engine and stores the parsed
204             * line fields.
205             */
206            private void readOneLine() {
207                try {
208                    parsedValues = getParser().getLine();
209                    if (parsedValues == null) {
210                        readingIsComplete();
211                    }
212                } catch (final IOException e) {
213                    LOG.warn("Error in reading a line from the CSV stream ", e);
214                    readingIsComplete();
215                }
216    
217            }
218    
219            /**
220             * This method is not supported.
221             *
222             * @see java.util.Iterator#remove()
223             */
224            public void remove() {
225                LOG
226                        .debug("Invalid call to the unsupported remove() method on the iterator");
227                throw new UnsupportedOperationException(
228                        "This method is not supported");
229            }
230        }
231    
232        /**
233         * Indicates whether the header row is present or not.
234         *
235         * @return Returns <code>true</code> if the header row is present
236         */
237        public boolean isHeaderPresent() {
238            return this.headerPresent;
239        }
240    
241        /**
242         * Indicates whether the reader has read all CSV lines.
243         *
244         * @return Returns <code>true</code> if all CSV lines have been read
245         */
246        public boolean isReadingComplete() {
247            return this.readingComplete;
248        }
249    
250        /**
251         * Sets the flag to denote that all lines have been read.
252         */
253        protected void readingIsComplete() {
254            this.readingComplete = true;
255        }
256    
257        /**
258         * Returns the internal CSV parser engine instance for this reader.
259         *
260         * @return Returns the parser instance
261         */
262        protected CSVParse getParser() {
263            return this.parser;
264        }
265    }