将格式化的文本文件读入数组

时间:2015-11-30 04:09:24

标签: c arrays

我有一个给定格式的文本文件:

 2078.62        5.69982       -0.17815       -0.04732
 5234.95        8.40361        0.04028        0.10852
 2143.66        5.35245        0.10747       -0.11584
 7216.99        2.93732       -0.18327       -0.20545
 1687.24        3.37211        0.14195       -0.14865
 2065.23        34.0188         0.1828        0.21199
 2664.57        2.91035        0.19513        0.35112
 7815.15        9.48227       -0.11522        0.19523
 5166.16        5.12382       -0.29997       -0.40592
 6777.11        5.53529       -0.37287       -0.43299
 4596.48        1.51918       -0.33986        0.09597
 6720.56        15.4161       -0.00158        -0.0433
 2652.65        5.51849        0.41896       -0.61039

我写了以下函数来读取文件

读取行数

unsigned int getnumline(const char *sn){
   unsigned int n;
   char lcstring[LCLENGTH];
   FILE *lcpipe;
   char buff[512];
   snprintf( lcstring, LCLENGTH, 
      "wc -l %s | cut -d ' ' -f1", sn);
   lcpipe = popen( lcstring, "r" );
   if (lcpipe == NULL)
     exit_failure( "popen: " );

   while(fgets(buff, sizeof(buff), lcpipe)!=NULL){
        n=atoi(buff);
   }
   pclose(lcpipe);
   printf("Number of lines in the input file: %d\n", n);
   return n;
}

阅读文本文件

double **callocmatrix( unsigned int m, unsigned int n ) {

  double **matrix;
  unsigned int i;

  matrix = (double **)calloc(m, sizeof(double *));
  if ( !matrix ) 
    return NULL;

  matrix[0] = (double *)calloc(m*n, sizeof(double));
  if ( !matrix[0] )
    return NULL;

  for ( i = 1; i < m; i += 1 )
    matrix[i] = matrix[i-1] + n;

  return matrix;
}

void freematrix( double **matrix ) {

  free( (void *)matrix[0] );
}



double **ellcat;   
ngal = getnumline(sname);
ellcat = callocmatrix( ngal, 4 );
void readcat( double **ellcat, unsigned int catlen, const char *sn ) {
     unsigned int i;      
     FILE *fp=fopen(sn,"r");
     if(fp == NULL) 
      {
         printf("Error in opening file\n");
         exit(0);
      }          
     for (i=0 ; i< catlen ; i++)
     {        
       fscanf(fp, "%lf %lf %lf %lf", &ellcat[i][0], &ellcat[i][1], &ellcat[i][2], &ellcat[i][3]);        
     }    
     for (i=0 ; i< catlen ; i++)
     {
      printf("x = %lf, y = %lf, e1 = %lf, e2 = %lf\n", &ellcat[i][0], &ellcat[i][1], &ellcat[i][2], &ellcat[i][3]);
     }
     fclose(fp); 
}

ellcat组件为空

x = 0.000000, y = 0.000000, e1 = 0.000000, e2 = 0.000000
x = 0.000000, y = 0.000000, e1 = 0.000000, e2 = 0.000000
x = 0.000000, y = 0.000000, e1 = 0.000000, e2 = 0.000000
x = 0.000000, y = 0.000000, e1 = 0.000000, e2 = 0.000000
x = 0.000000, y = 0.000000, e1 = 0.000000, e2 = 0.000000
x = 0.000000, y = 0.000000, e1 = 0.000000, e2 = 0.000000
x = 0.000000, y = 0.000000, e1 = 0.000000, e2 = 0.000000
x = 0.000000, y = 0.000000, e1 = 0.000000, e2 = 0.000000
x = 0.000000, y = 0.000000, e1 = 0.000000, e2 = 0.000000
x = 0.000000, y = 0.000000, e1 = 0.000000, e2 = 0.000000

我无法弄清楚我的readcat函数存在什么问题。

P.S。我必须提到我是关于C语言的新手

4 个答案:

答案 0 :(得分:2)

首先,使用外部程序计算行数有点尴尬。简单地计算文件中换行符的数量会更有效(涉及更少的代码和更少的系统资源):

size_t get_newline_count(char const *f) {
    size_t newline_count = 0;
    FILE *fd = fopen(f, "r");
    if (!fd) {
        return 0;
    }
    int c;
    do {
        c = fgetc(fd);
        newline_count += (c == '\n');
    } while (c >= 0);
    fclose(fd);
    return newline_count;
}
unsigned int n;
...
printf("Number of lines in the input file: %d\n", n);

我上面写的函数的返回类型是size_t,所以你想要使用%zu格式说明符来打印它。当您要打印的类型为unsigned int时(如代码中所示),请使用%u;如果您提供了错误类型的参数(%d对应于int),则行为未定义。

fscanf(fp, "%lf %lf %lf %lf", &ellcat[i][0], &ellcat[i][1], &ellcat[i][2], &ellcat[i][3]);

未来注释:您有责任向我们提供完整的MCVE 编译重现问题无需填写任何空白

您尚未向我们提供ellcat的声明,因此我们只能推测您可能提供了错误的类型(因为您的确如上所述)。 ellcat需要有两个间接级别引用double才能使此代码正确无误。

printf("x = %lf, y = %lf, e1 = %lf, e2 = %lf\n", &ellcat[i][0], &ellcat[i][1], &ellcat[i][2], &ellcat[i][3])

最后,如另一个答案所示,当使用printf打印double(或float时,l中的%lf在这种情况下是多余的由于默认促销为double),&#34;您不需要传递变量地址&#34; ...确实,您不应该通过一个地址,因为这意味着你提供了错误的类型,这是我们之前讨论过的未定义的行为。

答案 1 :(得分:1)

打印值时,在printf -

printf("x = %lf, y = %lf, e1 = %lf, e2 = %lf\n",  
        &ellcat[i][0], &ellcat[i][1], &ellcat[i][2], &ellcat[i][3]);
        ^              ^              ^              ^   

您不需要传递变量的地址。只是通过他们。删除&运算符。

答案 2 :(得分:1)

除非要求您在分配空间/将值读入数组之前预先读取以确定文件中的行数,否则没有理由这样做。您可以轻松地从文件中读取值,分配空间来保存它们,并跟踪在单个函数/读取循环中读取的数据行数。实际上,通过使用指向类型(双指针)的指针作为数据类型,可以很容易地实现这一点。

在您的情况下,正常的方法是为一些合理预期的行数(指针)分配内存,然后为每行数据读取/分配空间,直到达到此原始限制,然后到realloc根据需要读取整个文件到指向双精度数组的指针数组的附加指针。行计数传递给函数的指针参数,并在函数内更新,使得调用函数中的总行数可用(main()此处)。

满足您要求的一个功能的例子是:

/* read rows in filename 'fn' that consist of 4 whitespace
 * separated double values into the dynamically allocated
 * array of pointer to array of 4 doubles 'ar', updating the
 * size_t value 'nrows' with the number of rows of data read.
 * returns pointer to allocated array on success, NULL otherwise
 */
double **readcat (double ***ar, size_t *nrow, const char *fn) 
{
    double tmp[4] = {0.0};                    /* tmp array for values  */
    FILE *fp = fn ? fopen (fn, "r") : stdin;  /* read from fn or stdin */
    if (!fp) {
        fprintf (stderr, "readcat() error: file open failed '%s'.\n", fn);
        return NULL;
    }

    /* allocate MAXR pointers to double* */
    if (!(*ar = calloc (MAXR, sizeof **ar))) {
        fprintf (stderr, "readcat() error: virtual memory exhausted.\n");
        return NULL;
    }

    *nrow = 0; /* set index to 0, read each row in file */
    while (fscanf (fp, "%lf %lf %lf %lf", 
                &tmp[0], &tmp[1], &tmp[2], &tmp[3]) == 4) {

        /* 4 values read, allocate memory in *ar */
        if (!((*ar)[*nrow] = calloc (4, sizeof ***ar))) {
            fprintf (stderr, "readcat() error: virtual memory exhausted.\n");
            return NULL;
        }
        /* copy values to *ar */
        memcpy ((*ar)[*nrow], tmp, sizeof tmp);
        (*nrow)++;  /* increment row index */

        if (*nrow == MAXR) {    /* test rows againt max (break/realloc) */
            fprintf (stderr, "readcat() warning: MAXR rows read.\n");
            break;
        }
    }

    if (fp != stdin) fclose (fp);   /* close file */

    return *ar;
}

注意,上面省略了重新分配代码,如果达到指针(行)MAXR的最大数量,则退出读取循环。另请注意,通过返回指针,您不仅可以根据返回检查成功/失败,还可以选择将返回指定给指针。

在您的数据中使用该功能的简短驱动程序将是:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#define MAXR 64

double **readcat (double ***ar, size_t *nrow, const char *fn);

int main (int argc, char **argv) {

    size_t i, n = 0;
    double **ellcat = NULL;
    char *filename = argc > 1 ? argv[1] : NULL;

    ellcat = readcat (&ellcat, &n, filename);

    if (!ellcat) {  /* validate address returned by readcat */
        fprintf (stderr, "error: readcat returned no values.\n");
        exit (EXIT_FAILURE);
    }

    printf ("\n %zu rows of data read:\n\n", n);
    for (i = 0; i < n; i++)  /* print each row of data */
        printf ("  %8.2lf  %8.5lf  %8.5lf  %8.5lf\n",
                ellcat[i][0], ellcat[i][1], ellcat[i][2], ellcat[i][3]);
    putchar ('\n');

    for (i = 0; i < n; i++)  /* free all allocated memory */
        free (ellcat[i]);
    free (ellcat);

    return 0;
}

使用/输出

$ ./bin/fscanf_4col_dyn dat/float_4col.txt

 13 rows of data read:

   2078.62   5.69982  -0.17815  -0.04732
   5234.95   8.40361   0.04028   0.10852
   2143.66   5.35245   0.10747  -0.11584
   7216.99   2.93732  -0.18327  -0.20545
   1687.24   3.37211   0.14195  -0.14865
   2065.23  34.01880   0.18280   0.21199
   2664.57   2.91035   0.19513   0.35112
   7815.15   9.48227  -0.11522   0.19523
   5166.16   5.12382  -0.29997  -0.40592
   6777.11   5.53529  -0.37287  -0.43299
   4596.48   1.51918  -0.33986   0.09597
   6720.56  15.41610  -0.00158  -0.04330
   2652.65   5.51849   0.41896  -0.61039

内存错误/泄漏检查

在你的动态分配内存的任何代码中,你有2个责任关于任何分配的内存块:(1)总是保留一个指向内存块起始地址的指针,所以,(2)它可以在释放时释放它不再需要了。您必须使用内存错误检查程序,以确保您没有在已分配的内存块之外/之外写入,并确认已释放已分配的所有内存。对于Linux valgrind是正常的选择。有许多微妙的方法来滥用可能导致实际问题的内存块,没有理由不这样做。每个平台都有类似的记忆检查器。它们都很简单易用。只需通过它运行您的程序。

$ valgrind ./bin/fscanf_4col_dyn dat/float_4col.txt
==21392== Memcheck, a memory error detector
==21392== Copyright (C) 2002-2012, and GNU GPL'd, by Julian Seward et al.
==21392== Using Valgrind-3.8.1 and LibVEX; rerun with -h for copyright info
==21392== Command: ./bin/fscanf_4col_dyn dat/float_4col.txt
==21392==

 13 rows of data read:

   2078.62   5.69982  -0.17815  -0.04732
<snip>
   2652.65   5.51849   0.41896  -0.61039

==21392==
==21392== HEAP SUMMARY:
==21392==     in use at exit: 0 bytes in 0 blocks
==21392==   total heap usage: 15 allocs, 15 frees, 1,496 bytes allocated
==21392==
==21392== All heap blocks were freed -- no leaks are possible
==21392==
==21392== For counts of detected and suppressed errors, rerun with: -v
==21392== ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 2 from 2)

答案 3 :(得分:0)

您在打印时不需要提供地址,请将其更改为

layout: 'hbox'