Results 1 to 9 of 9

Thread: CSV Reader

  1. #1
    yellowledbet is offline Senior Member
    Join Date
    Feb 2011
    Location
    Georgia, USA
    Posts
    122
    Rep Power
    0

    Default CSV Reader

    I have seen many requests on how to read a CSV file. I thought it would be fun to try to write one. I thought it may be useful in the future to share. Code is below. Also, I would love any feedback on how to make it better.

    Java Code:
    /**********EXAMPLE Use of Class************************************************
    		String filePath = "c:\\YOUR_FILE"; //Path to CSV file
    		String[][] csvDataStructure;
    		
    		try{
    			CSVReader cr = new CSVReader(filePath);
    			cr.removeQuotes(); //If quotes surround values in CSV File
    			//Ensure file was parsed correctly
    			//Get Data Structure
    			csvDataStructure = cr.getArray();
    			//Do Something with Data ....
    			//Default toString Method
    			System.out.println(cr.toString());
    		}catch(NullPointerException npe){
    			npe.printStackTrace();
    		}catch(IOException ioe){
    			ioe.printStackTrace();
    		}catch(InvalidCSVException csve){
    			csve.printStackTrace();
    		}
    *************************************************************************/
    import java.io.*;
    
    public class CSVReader {
    	
    	private String[][] CSVArr; //Main Data Structure
    	private String[] firstRow; //Holds first row of data
    	private int rows = 1000; //Number of rows that CSVArr is initialized with
    	private int cols; //columns in Arr
    	private File file; //file containing CSV
    	private String delimiter = ","; //Delimiter Used
    	private int rowCount = 0; //keeps track of last used index of array
    	
    	/**
    	 *  initializes object with string reference to file containing CSV data
    	 * @param filePath
    	 * @throws NullPointerException
    	 * @throws IOException
    	 * @throws InvalidCSVException
    	 */
    	public CSVReader(String filePath) throws NullPointerException, IOException, InvalidCSVException{
    		file = new File(filePath);
    		parseFile();
    	}
    	/**
    	 * Inializes object with specified delimeter and reference to file. 
    	 * @param filePath string reference to file
    	 * @param delim Delimiter used to separate values in CSV file
    	 * @throws NullPointerException
    	 * @throws IOException
    	 * @throws InvalidCSVException
    	 */
    	
    	public CSVReader(String filePath, String delim) throws NullPointerException, IOException, InvalidCSVException{
    		file = new File(filePath);
    		delimiter = delim;
    		parseFile();
    	}
    	
    	
    	/**
    	 * 
    	 * @return
    	 */
    	public String[][] getArray(){
    		return CSVArr;
    	}
    	
    	/**
    	 * reads csv file into array
    	 * @throws IOException
    	 * @throws InvalidCSVException thrown if there is a discrepancy in # of columns between rows
    	 */
    	
    	public void parseFile() throws IOException, InvalidCSVException{
    		BufferedReader br;
    		br = new BufferedReader(new FileReader(file));
    		firstRow = br.readLine().split(delimiter);
    		cols =  firstRow.length;
    		CSVArr = new String[rows][cols];
    		CSVArr[0] = firstRow;
    		rowCount++;
    		String row;
    		String[] lineArr;
    		while((row = br.readLine()) != null){
    			row += ' ';
    			lineArr = row.split(delimiter);
    			if(lineArr.length != firstRow.length){
    				throw new InvalidCSVException();
    			}
    			CSVArr[rowCount] = lineArr;
    			rowCount++;
    				
    			if(rowCount == rows){
    				increaseArray();
    			}
    				
    		}
    			
    		condenseArr();
    		
    		
    	}
    	
    	/**
    	 * removes extra rows from CSVArr once it has been read from file
    	 */
    	private void condenseArr(){
    		String[][] tempArr = new String[rowCount][];
    		for(int r = 0; r < rowCount; r++){
    			tempArr[r] = CSVArr[r];
    		}
    		
    		CSVArr = tempArr;
    		
    	}
    	/**
    	 * returns first row of CSV file as String array, presumably headers 
    	 * @return String[] 
    	 */
    	
    	public String[] getHeaders(){
    		return firstRow;
    	}
    	
    	/**
    	 * Doubles size of CSVArray. Called with class when CSVArr runs out of space
    	 */
    	
    	private void increaseArray(){
    		String[][] tempArr = new String[rows += rowCount][];
    		for(int r = 0; r < rowCount; r++){
    			tempArr[r] = CSVArr[r];
    		}
    		
    		CSVArr = tempArr;
    	}
    	
    	
    	/**
    	 * Returns simple string formatted representation of data structure
    	 * @return String representing CSV array. Rows separated by line break, columns separated by tab
    	 */
    	public String toString(){
    		StringBuilder sb = new StringBuilder();
    		for(int r = 0; r < rowCount; r++){
    			sb.append(r + ": ");
    			for(int c = 0; c < CSVArr[r].length; c++){
    				
    				sb.append(CSVArr[r][c]);
    				sb.append("\t");
    			}
    			sb.append(System.getProperty("line.separator"));
    		}
    		return sb.toString();
    	}
    	
    	/**
    	 * removes characters from each value. Presumably used for quotes used to encapsulate string values
    	 * @param separators character to remove
    	 */
    	public void removeCharacters(String separators){
    		for(int r = 0; r < rowCount; r++){
    			for(int c = 0; c < CSVArr[r].length; c++){
    				CSVArr[r][c] = CSVArr[r][c].replace(separators, "");
    			}
    		}
    	}
    	
    	/**
    	 * convenience method for removing quotes
    	 */
    	
    	public void removeQuotes(){
    		removeCharacters("\"");
    	}
    	
    }
    InvalidCSVExecption class
    Java Code:
    //Thrown when there is a discrpepance between number of columns in when parsing CSV file
    public class InvalidCSVException extends Exception {
    
    	  public InvalidCSVException(){
    	    super();             // call superclass constructor
    	  }
    
    }

  2. #2
    kjkrum's Avatar
    kjkrum is offline Senior Member
    Join Date
    Apr 2011
    Location
    Tucson, AZ
    Posts
    1,060
    Rep Power
    6

    Default

    The problem with using String.split() this way is there's no way to literally represent a delimiter character.

    In my own CSVUtils class, I scan the string character by character with this logic:

    Java Code:
    if a flag indicates that the previous character was an escape character
        if the current character is the escape character or the delimiter character
            append it to the current token as a literal
        else throw an exception indicating a malformed escape sequence
    else if the current character is the escape character
        set the flag indicating the next character is escaped
    else if the current character is the delimiter character
        start a new token
    else
        append the current character to the current token
    Get in the habit of using standard Java naming conventions!

  3. #3
    DarrylBurke's Avatar
    DarrylBurke is offline Forum Police
    Join Date
    Sep 2008
    Location
    Madgaon, Goa, India
    Posts
    11,458
    Rep Power
    20

    Default

    My quick reaction: I feel the constructor of InvalidCSVException should accept a parameter String message, and where the exception is thrown, I would assemble a message something like
    Java Code:
    "Columns found: " + lineArr.length + " Expected: " + firstRow.length + "\n" + "\"" + row + "\""
    That could give valuable feedback to facilitate manual correction of the CSV file.

    Also, I think "MalformedCSVException" may be a little more descriptive, but InvalidCSVException is also fine.

    Another point is that using System#arrayCopy(...) or Arrays#copyOf(...), which uses arrayCopy under the covers, can both improve the overall readability of your code and improve performance.

    db

  4. #4
    yellowledbet is offline Senior Member
    Join Date
    Feb 2011
    Location
    Georgia, USA
    Posts
    122
    Rep Power
    0

    Default

    Thank you both! I will see if I can work your suggestions into my code and post a revised class.

  5. #5
    yellowledbet is offline Senior Member
    Join Date
    Feb 2011
    Location
    Georgia, USA
    Posts
    122
    Rep Power
    0

    Default

    attempt 2

    the logic seemed to get a little messy, but it seemed to work on the test data sets i found. thanks for the suggestions and more feedback is always appreciated.

    Java Code:
    /**********EXAMPLE Use of Class************************************************
    		String filePath = "c:\\YOUR_FILE"; //Path to CSV file
    		String[][] csvDataStructure;
    		
    		try{
    			CSVReader cr = new CSVReader(filePath);
    			//Get Data Structure
    			csvDataStructure = cr.getArray();
    			//Do Something with Data ....
    			//Default toString Method
    			System.out.println(cr.toString());
    		}catch(NullPointerException npe){
    			npe.printStackTrace();
    		}catch(MalformedCVSException ioe){
    			ioe.printStackTrace();
    		}catch(InvalidCSVException csve){
    			csve.printStackTrace();
    		}
    *************************************************************************/
    import java.io.*;
    import java.util.Arrays;
    
    public class CSVReader {
    	
    	private String[][] CSVArr; //Main Data Structure
    	private String[] firstRow; //Holds first row of data
    	private int rows = 1000; //Number of rows that CSVArr is initialized with
    	private int cols; //columns in Arr
    	private File file; //file containing CSV
    	private char delimiter = ','; //Delimiter Used
    	private int rowCount = 0; //keeps track of last used index of array
    	private char escapeCharacter = '"'; //character that surrounds text containing delimiters
    	private boolean escapeFlag = false; //flag designating whether the iterator is within escape text
    	private String splitString = "<spl>"; //used as delimiter when building 
    	
    	
    	/**
    	 *  initializes object with string reference to file containing CSV data
    	 * @param filePath
    	 * @throws NullPointerException
    	 * @throws IOException
    	 * @throws InvalidCSVException
    	 */
    	public CSVReader(String filePath) throws NullPointerException, IOException, MalformedCSVException{
    		file = new File(filePath);
    		parseFile();
    	}
    	/**
    	 * Inializes object with specified delimeter and reference to file. 
    	 * @param filePath string reference to file
    	 * @param delim Delimiter used to separate values in CSV file
    	 * @throws NullPointerException
    	 * @throws IOException
    	 * @throws InvalidCSVException
    	 */
    	
    	public CSVReader(String filePath, char delim) throws NullPointerException, IOException, MalformedCSVException{
    		file = new File(filePath);
    		delimiter = delim;
    		parseFile();
    	}
    	
    	/**
    	 * 
    	 * @param filePath
    	 * @param delim
    	 * @param escapeChar
    	 * @throws NullPointerException
    	 * @throws IOException
    	 * @throws InvalidCSVException
    	 */
    	public CSVReader(String filePath, char delim, char escapeChar) throws NullPointerException, IOException, MalformedCSVException{
    		file = new File(filePath);
    		delimiter = delim;
    		this.escapeCharacter = escapeChar;
    		parseFile();
    	}
    	
    	
    	/**
    	 * 
    	 * @return
    	 */
    	public String[][] getArray(){
    		return CSVArr;
    	}
    	
    	/**
    	 * reads csv file into array
    	 * @throws IOException
    	 * @throws InvalidCSVException thrown if there is a discrepancy in # of columns between rows
    	 */
    	
    	public void parseFile() throws IOException, MalformedCSVException{
    		BufferedReader br;
    		br = new BufferedReader(new FileReader(file));
    		CSVArr = new String[rows][];
    		int nextChar;
    		String lineString;
    		char[] line;
    		String[] row;
    		
    		StringBuilder sb = new StringBuilder();
    		while((lineString = br.readLine()) != null){
    			line = lineString.toCharArray();
    			sb = new StringBuilder();
    			for(int i = 0; i < line.length; i++){
    				nextChar = line[i];
    				if(nextChar == escapeCharacter && !escapeFlag) escapeFlag = true;
    				else if(nextChar == escapeCharacter && escapeFlag) escapeFlag = false;
    				else if(nextChar == delimiter && !escapeFlag){
    					sb.append(splitString);
    					//force empty field at end if row ends in delimiter
    					if(i == line.length-1) sb.append(' ');
    				}else{
    					sb.append((char)nextChar);
    				}
    				
    			}			
    			
    			row = sb.toString().split(splitString);
    			if(rowCount > 0 && CSVArr[0].length != row.length){
    				String error = "Unable to parse line " + rowCount;
    				throw new MalformedCSVException(error);
    			}else{
    				CSVArr[rowCount] = row;
    				rowCount++;
    			}
    			//Double Array size if size has Reached Max
    			if(rowCount == CSVArr.length){
    				CSVArr = Arrays.copyOf(CSVArr, CSVArr.length * 2);
    			}
    				
    		}
    		//Remove Empty Rows	
    		CSVArr = Arrays.copyOf(CSVArr, rowCount);
    	}
    		
    	
    	/**
    	 * returns first row of CSV file as String array, presumably headers 
    	 * @return String[] 
    	 */
    	
    	public String[] getHeaders(){
    		return CSVArr[0];
    	}
    	
    	
    	/**
    	 * Returns simple string formatted representation of data structure
    	 * @return String representing CSV array. Rows separated by line break, columns separated by tab
    	 */
    	public String toString(){
    		StringBuilder sb = new StringBuilder();
    		for(int r = 0; r < rowCount; r++){
    			sb.append(r + ": ");
    			for(int c = 0; c < CSVArr[r].length; c++){
    				
    				sb.append(CSVArr[r][c]);
    				sb.append("\t");
    			}
    			sb.append(System.getProperty("line.separator"));
    		}
    		return sb.toString();
    	}
    	
    }
    Java Code:
    public class MalformedCSVException extends java.lang.Exception{
    	  public MalformedCSVException(String mis){
    		    super(mis);             // call superclass constructor
    		  }
    }

  6. #6
    pbrockway2 is offline Moderator
    Join Date
    Feb 2009
    Location
    New Zealand
    Posts
    4,585
    Rep Power
    12

    Default

    parseFile()'s "InvalidCSVException thrown if there is a discrepancy in # of columns between rows" is a bit of a restriction. Why not return a "ragged" array where each line array contains the actual number of elements contained in that line. The logic you currently apply to the rows with increaseArray() might have to be applied to the row elements as well, which suggests a separate GrowableArray class - or use a List.

    It might be more flexible for the constructor to take an InputStream and/or Reader argument instead of a filename string so the class can be used with URLs, the contents of StringBuilder instances etc.

  7. #7
    yellowledbet is offline Senior Member
    Join Date
    Feb 2011
    Location
    Georgia, USA
    Posts
    122
    Rep Power
    0

    Default

    I thought about allowing ragged arrays and it would actually work like that, if I just removed the if statement comparing the current row against the first row, but I thought that csv files were supposed to be tabular so the columns all lined up, there would be no guarantee of that if I removed that condition. I could provide a flag that allows for it. What do you think?

    The suggestion about allowing input streams is a great one. I will see what I can come up with.

  8. #8
    pbrockway2 is offline Moderator
    Join Date
    Feb 2009
    Location
    New Zealand
    Posts
    4,585
    Rep Power
    12

    Default

    I thought that csv files were supposed to be tabular so the columns all lined up
    You are quite right. I just checked and this requirement is, eg, point 4 of RFC 4180.

  9. #9
    kjkrum's Avatar
    kjkrum is offline Senior Member
    Join Date
    Apr 2011
    Location
    Tucson, AZ
    Posts
    1,060
    Rep Power
    6

    Default

    I didn't look too closely at your code, but your approach seems different from the one I suggested. My method of representing the delimiter character literally is to precede it with an escape character, such as '\'. So if I was using ',' as my delimiter and wanted to represent the string "Hello, world!", I would store it as "Hello\, world!" (Actually, my CSVUtils class takes care of this automatically.)

    I see that one of your goals is to follow RFC4180. That was decidedly not one of my goals, so my class may be of limited use to you. It doesn't care if the columns are ragged, allows lines to end with delimiters without producing an empty field, and makes no provision for a newline within a field. Also, quotation marks around a field would be included literally in the field.

    Here it is anyway:

    Java Code:
    package krum.util;
    
    import java.util.LinkedList;
    
    
    
    public class CSVUtils {
    	
    	private CSVUtils() {
    		// not instantiable
    	}
    	
    	/**
    	 * Splits a delimited string into fields, taking escape sequences into account.
    	 * Valid escape sequences are the escape character followed by the delimiter,
    	 * or the escape character followed by itself.  It is allowed, but not
    	 * required, that the last field be terminated by the delimiter.
    	 */
    	public static String[] split(String csv, char delimiter, char escape) {
    		LinkedList<String> fields = new LinkedList<String>();
    		char[] chars = csv.toCharArray();		
    		boolean inEscape = false; // true if prev char was escape
    		StringBuilder builder = new StringBuilder();
    		for(int i = 0; i < chars.length; ++i) {
    			if(inEscape) {
    				if(chars[i] == escape || chars[i] == delimiter) {
    					inEscape = false;
    					builder.append(chars[i]);
    					continue;
    				}
    				else {
    					throw new IllegalArgumentException("invalid escape sequence");
    				}
    			}
    			else if(chars[i] == escape) {
    				inEscape = true;
    			}
    			else if(chars[i] == delimiter) {
    				fields.add(builder.toString());
    				builder = new StringBuilder();
    			}
    			else {
    				builder.append(chars[i]);
    			}
    		}
    		
    		// save the last field if it's not terminated by a delimiter
    		if(builder.length() > 0) 
    			fields.add(new String(builder));
    		
    		return fields.toArray(new String[0]);
    	}
    	
    	
    	/**
    	 * Escapes special characters in fields and joins them into a delimited string.
    	 */
    	public static String join(String[] fields, char delimiter, char escape) {
    		if(fields.length == 0)
    			return "";
    		StringBuilder builder = new StringBuilder();
    		builder.append(escape(fields[0], delimiter, escape));
    		for(int i = 1; i < fields.length; ++i) {
    			builder.append(delimiter);
    			builder.append(escape(fields[i], delimiter, escape));
    		}
    		return builder.toString();
    	}
    	
    	
    	
    	private static String escape(String field, char delimiter, char escape) {
    		StringBuilder builder = new StringBuilder();
    		
    		char[] chars = field.toCharArray();
    		for(char c : chars) {
    			if(c == escape || c == delimiter)
    				builder.append(escape);
    			builder.append(c);
    		}
    		
    		return builder.toString();
    	}
    
    }
    Get in the habit of using standard Java naming conventions!

Similar Threads

  1. Buffered reader
    By franckboy in forum New To Java
    Replies: 1
    Last Post: 02-02-2011, 08:16 PM
  2. Barcode Reader
    By kaotic101 in forum New To Java
    Replies: 3
    Last Post: 01-03-2011, 06:10 PM
  3. RSS Reader
    By dage_001 in forum Networking
    Replies: 16
    Last Post: 04-25-2010, 02:41 PM
  4. Buffered Reader
    By ilovepolluting in forum New To Java
    Replies: 2
    Last Post: 02-04-2010, 10:16 AM
  5. help with file reader
    By jason27131 in forum New To Java
    Replies: 1
    Last Post: 08-01-2007, 04:03 AM

Posting Permissions

  • You may not post new threads
  • You may not post replies
  • You may not post attachments
  • You may not edit your posts
  •