从CSV文件中删除空行和列 - C#

时间:2017-06-20 12:57:38

标签: c#

我想"清洁" CSV文件:

  • 删除空行
  • 删除空列

行或列不是完全空的,例如: """""""""""& #34;"""""""""",& #34;""""&#34 ;, (以行的形式) 要么 """""""""""& #34;"""""""&#34 ;, (以行的形式) OR

"",

"",

"",

"",

"",

"",

"",

(以列形式)

这些行或列可以位于CSV文件中的任何位置。

到目前为止我所拥有的:

private void button1_Click(object sender, EventArgs e)
        {

            string sourceFile = @"XXXXX.xlsx";
            string worksheetName = "Sample";
            string targetFile = @"C:\Users\xxxx\xls_test\XXXX.csv";

            // Creates the CSV file based on the XLS file
            ExcelToCSVCoversion(sourceFile, worksheetName, targetFile);

            // Manipulate the CSV: Clean empty rows
            DeleteEmptyRoadFromCSV(targetFile);
        }

        static void ExcelToCSVCoversion(string sourceFile, string worksheetName,
            string targetFile)
        {
            string connectionString = @"Provider =Microsoft.ACE.OLEDB.12.0;Data Source=" + sourceFile
                + @";Extended Properties=""Excel 12.0 Xml;HDR=YES""";
            OleDbConnection connection = null;
            StreamWriter writer = null;
            OleDbCommand command = null;
            OleDbDataAdapter dataAdapter = null;

            try
            {
                // Represents an open connection to a data source. 
                connection = new OleDbConnection(connectionString);
                connection.Open();

                // Represents a SQL statement or stored procedure to execute  
                // against a data source. 
                command = new OleDbCommand("SELECT * FROM [" + worksheetName + "$]",
                                            connection);
                // Specifies how a command string is interpreted. 
                command.CommandType = CommandType.Text;
                // Implements a TextWriter for writing characters to the output stream 
                // in a particular encoding. 
                writer = new StreamWriter(targetFile);
                // Represents a set of data commands and a database connection that are  
                // used to fill the DataSet and update the data source. 
                dataAdapter = new OleDbDataAdapter(command);

                DataTable dataTable = new DataTable();
                dataAdapter.Fill(dataTable);

                for (int row = 0; row < dataTable.Rows.Count; row++)
                {
                    string rowString = "";
                    for (int column = 0; column < dataTable.Columns.Count; column++)
                    {
                        rowString += "\"" + dataTable.Rows[row][column].ToString() + "\",";
                    }
                    writer.WriteLine(rowString);
                }

                Console.WriteLine();
                Console.WriteLine("The excel file " + sourceFile + " has been converted " +
                                  "into " + targetFile + " (CSV format).");
                Console.WriteLine();
            }
            catch (Exception exception)
            {
                Console.WriteLine(exception.ToString());
                Console.ReadLine();
            }
            finally
            {
                if (connection.State == ConnectionState.Open)
                {
                    connection.Close();
                }
                connection.Dispose();
                command.Dispose();
                dataAdapter.Dispose();
                writer.Close();
                writer.Dispose();
            }
        }

        static void DeleteEmptyRoadFromCSV(string fileName)
        {
            //string nonEmptyLines = @"XXXX.csv";
            var nonEmptyLines = File.ReadAllLines(fileName)
                        .Where(x => !x.Split(',')
                                     .Take(2)
                                     .Any(cell => string.IsNullOrWhiteSpace(cell))
                         // use `All` if you want to ignore only if both columns are empty.  
                         ).ToList();

        File.WriteAllLines(fileName, nonEmptyLines);
        }

最后,我尝试使用以下创意: Remove Blank rows from csv c#。但我的输出完全没有变化。

欢迎任何帮助!

谢谢。

2 个答案:

答案 0 :(得分:1)

您可以在保存csv之前从表中删除列/行。 方法没有经过测试,但你应该得到这个概念。

 static void ExcelToCSVCoversion(string sourceFile, string worksheetName,
       string targetFile)
    {
        string connectionString = @"Provider =Microsoft.ACE.OLEDB.12.0;Data Source=" + sourceFile
            + @";Extended Properties=""Excel 12.0 Xml;HDR=YES""";
        OleDbConnection connection = null;
        StreamWriter writer = null;
        OleDbCommand command = null;
        OleDbDataAdapter dataAdapter = null;

        try
        {
            // Represents an open connection to a data source. 
            connection = new OleDbConnection(connectionString);
            connection.Open();

            // Represents a SQL statement or stored procedure to execute  
            // against a data source. 
            command = new OleDbCommand("SELECT * FROM [" + worksheetName + "$]",
                                        connection);
            // Specifies how a command string is interpreted. 
            command.CommandType = CommandType.Text;
            // Implements a TextWriter for writing characters to the output stream 
            // in a particular encoding. 
            writer = new StreamWriter(targetFile);
            // Represents a set of data commands and a database connection that are  
            // used to fill the DataSet and update the data source. 
            dataAdapter = new OleDbDataAdapter(command);

            DataTable dataTable = new DataTable();
            dataAdapter.Fill(dataTable);
            var emptyRows =
                dataTable.Select()
                    .Where(
                        row =>
                            dataTable.Columns.Cast<DataColumn>()
                                .All(column => string.IsNullOrEmpty(row[column].ToString()))).ToArray();
            Array.ForEach(emptyRows, x => x.Delete());

            var emptyColumns =
                dataTable.Columns.Cast<DataColumn>()
                    .Where(column => dataTable.Select().All(row => string.IsNullOrEmpty(row[column].ToString())))
                    .ToArray();
            Array.ForEach(emptyColumns, column => dataTable.Columns.Remove(column));
            dataTable.AcceptChanges();

            for (int row = 0; row < dataTable.Rows.Count; row++)
            {
                string rowString = "";
                for (int column = 0; column < dataTable.Columns.Count; column++)
                {
                    rowString += "\"" + dataTable.Rows[row][column].ToString() + "\",";
                }
                writer.WriteLine(rowString);
            }

            Console.WriteLine();
            Console.WriteLine("The excel file " + sourceFile + " has been converted " +
                              "into " + targetFile + " (CSV format).");
            Console.WriteLine();
        }
        catch (Exception exception)
        {
            Console.WriteLine(exception.ToString());
            Console.ReadLine();
        }
        finally
        {
            if (connection.State == ConnectionState.Open)
            {
                connection.Close();
            }
            connection.Dispose();
            command.Dispose();
            dataAdapter.Dispose();
            writer.Close();
            writer.Dispose();
        }
    }

答案 1 :(得分:0)

请检查以下查询是否正常。我正在获取所有行:

var nonEmptyLines = File.ReadAllLines(FileName)
                        .Where(x => !x.Split(',')
                                     .Take(2)
                                     .Any(cell => string.IsNullOrWhiteSpace(cell))
                         // use `All` if you want to ignore only if both columns are empty.  
                         ).ToList();

我认为您可以使用以下内容:

  var nonEmptyLines = File.ReadAllLines(File).
                        SkipWhile(cell=>{var arr=cell.Split(',');if(string.IsNullOrWhiteSpace(cell)){
                            return true;
                        }
                            else
                        {
                            return false;
                        }
                        });