Testing

webTiger Logo Wide

Working With CSV Files in .NET

There will be a point when you need to import data from a CSV (comma-separated values) file, and you’ll realise what should be a straightforward process can become overcomplicated. Part of the reason for this is likely to be that you don’t fully understand the CSV format, because you are just blindly delimiting on any comma you find or because the CSV file you are importing is invalid because the person/program who generated it didn’t properly understand the encoding rules.

I can’t help with the latter case (corrupt files) but here is a class for importing from and exporting to CSV, to take the hard work out of it.

The solution below should be compliant with RFC4180, catering for encapsulated values spanning multiple lines, etc.

namespace My.IO
{
    using System.Collections.Generic;
    using System.IO;
    using System.Text;

    /// <summary>
    /// Provides methods for working with CSV files.
    /// </summary>
    public class Csv
    {
        /// <summary>
        /// Imports data from a CSV file into a form that is broadly 
        /// tabular in nature.
        /// </summary>
        /// <param name="path">The full path of the file.</param>
        /// <returns>A collection of rows of cells.</returns>
        public List<string[]> Import(string path)
        {
            if (!File.Exists(path)) return new List<string[]>();

            using (StreamReader reader = File.OpenText(path))
            {
                return this.Decode(reader.ReadToEnd());
            }
        }

        /// <summary>
        /// Processes raw CSV file content, based on RFC4180, into a form 
        /// that is broadly tabular in nature.</summary>
        /// <param name="data">The raw CSV file data.</param>
        /// <returns>A collection of rows of cells.</returns>
        public List<string[]> Decode(string data)
        {
            if (string.IsNullOrEmpty(data) || data.Trim() == "") 
            {
                return new List<string[]>();
            }

            List<string[]> rows = new List<string[]>();
            List<string> columns = new List<string>();
            StringBuilder cell = new StringBuilder(64);
            bool encapsulated = false;

            for (int i = 0; i < data.Length; i++)
            {
                if (encapsulated && data[i] == '\"')
                {
                    if (i < data.Length)
                    {
                        // Check the speech mark isn't double-escaped 
                        // (in which case it needs to be ignored.)
                        if (data[i + 1] == '\"') i++;
                        else encapsulated = false;
                    }
                    else
                    {
                        encapsulated = false;
                    }
                }
                else if (data[i] == '\"')
                {
                    encapsulated = true;
                }
                else if (!encapsulated && data[i] == ',')
                {
                    columns.Add(cell.ToString());
                    cell = new StringBuilder(64);
                }
                else if (!encapsulated && data[i] == '\n')
                {
                    if (cell.ToString().EndsWith("\r"))
                    {
                        // Discard the carriage return.
                        columns.Add(cell.ToString(0, cell.Length - 1)); 
                    }
                    else
                    {
                        columns.Add(cell.ToString());
                    }

                    cell = new StringBuilder(64);
                    rows.Add(columns.ToArray());
                    columns.Clear();
                }
                else
                {
                    cell.Append(data[i]);
                }
            }

            // Special case where the file only contains one line...
            if (rows.Count == 0 && columns.Count > 0)
            {
                if (cell.ToString() != "" && 
                    columns[columns.Count - 1] != cell.ToString()) 
                {
                    columns.Add(cell.ToString());
                }

                rows.Add(columns.ToArray());
            }

            return rows;
        }

        /// <summary>
        /// Writes data to a CSV file, overwriting the file if it already exists.
        /// </summary>
        /// <param name="path">The full path to the target file.</param>
        /// <param name="table">A collection of rows of cells to write.</param>
        public void Export(string path, List<object[]> table)
        {
            File.WriteAllText(path, this.Encode(table));
        }

        /// <summary>
        /// Returns text equivalent to the supplied tabular data, compliant to RFC4180.
        /// </summary>
        /// <param name="table">The tabular data to encode.</param>
        /// <returns>CSV compliant text version of the data.</returns>
        public string Encode(List<object[]> table)
        {
            if (table == null || table.Count == 0) return string.Empty;

            StringBuilder content = new StringBuilder(1024);

            foreach (object[] row in table)
            {
                foreach (object cell in row)
                {
                    content.Append(this.EncodeCell(cell, row.Length));
                    content.Append(",");
                }

                // remove trailing comma and then add a line-feed
                content.Remove(content.Length - 1, 1);
                content.AppendLine();
            }

            return content.ToString();
        }

        /// <summary>
        /// Encodes the supplied elemental cell data into a form compliant 
        /// with RFC4180.
        /// </summary>
        /// <param name="data">The celll data.</param>
        /// <param name="rowLength">The number of cells in the row.</param>
        /// <returns>Encoded version of the cell.</returns>
        private string EncodeCell(object data, int rowLength)
        {
            string dataAsText = data != null ? data.ToString() : string.Empty;

            if (dataAsText.Contains("\""))
            {
                // speech marks are escaped by putting another one just before them
                dataAsText = dataAsText.Replace("\"", "\"\"");
            }

            if (dataAsText.Contains(",") || 
                dataAsText.Contains("\"") ||
                dataAsText.Contains("\r") || 
                dataAsText.Contains("\n"))
            {
                // data containing special, reserved characters must be 
                // encapsulated in speech marks
                dataAsText = string.Format("\"{0}\"", dataAsText);
            }

            if (string.IsNullOrEmpty(dataAsText) && rowLength == 1)
            {
                // empty entries must be encapsulated in speech marks if 
                // there is only one entry in the row!
                dataAsText = "\"\"";
            }

            return dataAsText;
        }
    }
}Code language: C# (cs)