如何从C中的文件正确读取某些字符串?

时间:2019-03-02 23:55:42

标签: c string file text-files readfile

我试图用c编写一个比较字符串的程序。字符串是成对给出的,在文件的顶部有成对的数目。 该文件的格式如下:

2
a: 01010100000101011111
   01001010100000001111
   00000000000011110000
b: 00000111110000010001
   10101010100111110001
a: 00000011111111111100
   00111111111111000
b: 00000001111001010101

我的问题是正确读取字符串以执行比较等

这是我的代码:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/time.h>

#define NCHAR 32

int main (int argc, char **argv) {
    char *word1 = NULL;
    FILE *fp = NULL;
    for (int i = 0; i<pairs; i++){

        if (i == 0)
        {
            word1 = readWord(fp, &word1);//read a:
            while(strcmp(word1, "") == 0) word1 = readWord(fp, &word1);
        }

        word1 = readWord(fp, &word1);//read string
        while(strcmp(word1, "") == 0) word1 = readWord(fp, &word1);

        aline = malloc(amaxsize);
        strncpy(aline, word1, amaxsize);

        word1 = readWord(fp, &word1); 
        while(strcmp(word1, "") == 0) word1 = readWord(fp, &word1);

        while (strcmp(word1, "b:")!=0){
            aline = concat(aline, word1);

            word1 = readWord(fp, &word1); 
            while(strcmp(word1, "") == 0) word1 = readWord(fp, &word1);
        }

        fprintf(fpw, "a: %s\n", aline); //write to the file..
        free (word1);
        word1 = NULL;

        word1 = readWord(fp, &word1); //read string after b:
        while(strcmp(word1, "") == 0) word1 = readWord(fp, &word1);
        bline = malloc(bmaxsize);
        strncpy(bline, word1, bmaxsize);

        word1 = readWord(fp, &word1); 
        while(strcmp(word1, "") == 0) word1 = readWord(fp, &word1);

        if (i == (pairs-1))
        {

            while (strcmp(word1, "")!=0){
                bline = concat(bline, word1);
                word1 = readWord(fp, &word1);

            }
        }
        else 
        {
            while (strcmp(word1, "a:")!=0){
                bline = concat(bline, word1);
                word1 = readWord(fp, &word1);
                while(strcmp(word1, "") == 0) word1 = readWord(fp, &word1);
            }
        }
        fprintf(fpw, "b: %s\n", bline); //write to the file..
        free (word1);
        word1 = NULL;

        fprintf(fpw,"\n");
}

    char *readWord(FILE *fp, char **buffer)
    {
        int ch, nchar = NCHAR;
        int buflen = 0;
        *buffer = malloc (nchar);

        if(*buffer){
            while ((ch = fgetc(fp)) != '\n' && ch != EOF && ch != '\t' && ch != ' ') 
            {
                if (ch!='\t' && ch!= ' ' && ch != '\n') (*buffer)[buflen++] = ch;

                if (buflen + 1 >= nchar) {  /* realloc */
                    char *tmp = realloc (*buffer, nchar * 2);
                    if (!tmp) {

                        (*buffer)[buflen] = 0;

                        return *buffer;
                    }
                    *buffer = tmp;
                    nchar *= 2;
                }
            }
            (*buffer)[buflen] = 0;           /* nul-terminate */

            if (buflen == 0 && ch == EOF) {  /* return NULL if nothing read */
                free (*buffer);
                *buffer = NULL;
            }
            return *buffer;
        }
        else {
            fprintf (stderr, "Error...\n");
            return NULL;
        }
    }

readWord函数每次读取一个单词。我想做的是用字读取文件并将它们连接起来以获取完整的字符串a并保存在一行中,以便我可以对其进行处理。与b相同。问题是文件未正确读取,例如,我没有得到第一对的全部,而是只得到了第一部分。有什么主意吗?

1 个答案:

答案 0 :(得分:0)

您尝试从文件中进行的读取是不平凡的,但是可以通过设置一个标志来告诉您是已经看到'a'还是'b',跳过所有空格和{ {1}}个字符,将所有其他字符存储在缓冲区中,根据需要进行重新分配,然后在找到第二个':''a'时,将该字符放回'b'流中, FILE*,终止并返回缓冲区。

听起来很容易-对吗?好吧,差不多了。让我们看看您的ungetc函数中需要什么。

首先,由于您要在readword()中分配buffer,因此无需传递readword()作为参数。您已经将char **buffer声明为readword,因此只需将char *readword(...)指针作为参数传递,然后返回一个指向已分配,填充和 nul终止缓冲区的指针。

您可以按照自己喜欢的任何方式处理重新分配方案,可以从分配合理数量的字符开始,然后将当前大小增加一倍(或增加一些倍数),或者每次用尽时都添加固定数量。下面的示例仅从32个字符的缓冲区开始,然后在每次需要重新分配时添加另一个32个字符。 (如果数据大小确实未知,我可能会以32个字符开头,然后每次用完时都会翻倍-完全由您决定)。

使用FILE*中的isspace()函数可确保正确处理所有空格。

最后几个问题只是确保您在缓冲区中返回一个 nul终止的字符串,并确保在{{ 1}}。

完全将其放入,您可以执行以下操作。在ctype.h函数之后添加了一个简单的示例程序,以读取您的示例文件并输出从该文件读取的组合字符串,

realloc

注意:readword()上方只是一个#include <stdio.h> #include <stdlib.h> #include <ctype.h> #define NCHR 32 char *readword (FILE *fp) { int c, /* current character */ firstline = 0; /* flag for 'a' or 'b' found at 1st char */ size_t n = 0, nchr = NCHR; /* chars read, number of chars allocated */ char *buffer = NULL, *p; /* buffer to fill, pointer to buffer */ buffer = malloc (nchr); /* allocate initial NCHR */ if (!buffer) { /* validate */ perror ("malloc-buffer"); return NULL; } p = buffer; /* set pointer to buffer */ while ((c = fgetc (fp)) != EOF) { /* read each char */ if (isspace (c) || c == ':') /* skip all whitespace and ':' */ continue; if (c == 'a' || c == 'b') { /* begins with 'a' or 'b' */ if (firstline) { /* already had a/b line */ ungetc (c, fp); /* put the char back */ *p = 0; /* nul-terminate */ return buffer; /* return filled buffer */ } firstline = 1; /* set firstline flag */ continue; } else { if (n == nchr - 2) { /* check if realloc needed */ void *tmp = realloc (buffer, nchr + NCHR); if (!tmp) /* validate */ exit (EXIT_FAILURE); buffer = tmp; /* assign new block to buffer */ p = buffer + n; /* set p at buffer end */ nchr += NCHR; /* update no. chars allocated */ } *p++ = c; /* assign the current char and advance p */ n++; /* increment your character count */ } } *p = 0; /* nul-terminate */ return buffer; } int main (int argc, char **argv) { char buf[NCHR], *word; int nwords, toggle = 0; /* use filename provided as 1st argument (stdin by default) */ FILE *fp = argc > 1 ? fopen (argv[1], "r") : stdin; if (!fp) { /* validate file open for reading */ perror ("file open failed"); return 1; } if (!fgets (buf, NCHR, fp)) { fputs ("error: read of line 1 failed.\n", stderr); return 1; } if (sscanf (buf, "%d", &nwords) != 1) { fputs ("error: invalid file format.\n", stderr); return 1; } nwords *= 2; /* actual number of words is twice the number of pairs */ while (nwords-- && (word = readword (fp))) { printf ("%c: %s\n", toggle ? 'b' : 'a', word); free (word); if (toggle) { putchar ('\n'); toggle = 0; } else toggle = 1; } if (fp != stdin) fclose (fp); /* close file if not stdin */ return 0; } toggle标志,用于输出10在相应行的开头,并在所读取的两行之间添加"a:"

使用/输出示例

"b:"

内存使用/错误检查

在动态分配存储空间时始终验证您的内存使用情况,并确保已释放所有分配的内存。

'\n'

仔细研究一下,如果您有任何疑问,请告诉我。问题的最大部分是处​​理每一对所有行的读取和连接。其余的编码留给您。