Question

这个小程序的目的是比较两个单独文件（file1.csv和file2.csv）中的字符串，并将其结果提供给file3.csv。如果在file1.csv中找到file2.csv中的字符串，程序会将file1.csv中的字符串复制到file3.csv。

file1.csv的内容（包含6列）：

10000001    text1   text1   text1   text1   text1
10000002    text2   text2   text2   text2   text2
10000003    text3   text3   text3   text3   text3
10000004    text4   text4   text4   text4   text4
10000005    text5   text5   text5   text5   text5
10000006    text6   text6   text6   text6   text6
10000007    text7   text7   text7   text7   text7
10000008    text8   text8   text8   text8   text8
10000009    text9   text9   text9   text9   text9
10000010    text10  text10  text10  text10  text10
10000011    text11  text11  text11  text11  text11
10000012    text12  text12  text12  text12  text12
10000013    text13  text13  text13  text13  text13
10000014    text14  text14  text14  text14  text14
10000015    text15  text15  text15  text15  text15
10000016    text16  text16  text16  text16  text16
10000017    text17  text17  text17  text17  text17
10000018    text18  text18  text18  text18  text18
10000019    text19  text19  text19  text19  text19
10000020    text20  text20  text20  text20  text20
10000021    text21  text21  text21  text21  text21
10000022    text22  text22  text22  text22  text22
10000023    text23  text23  text23  text23  text23
10000024    text24  text24  text24  text24  text24
10000025    text25  text25  text25  text25  text25

file2.csv的内容（只有一列）：

预期结果将是：

10000001    text1   text1   text1   text1   text1
10000003    text3   text3   text3   text3   text3
10000004    text4   text4   text4   text4   text4
10000006    text6   text6   text6   text6   text6
10000007    text7   text7   text7   text7   text7
10000008    text8   text8   text8   text8   text8
10000009    text9   text9   text9   text9   text9
10000011    text11  text11  text11  text11  text11
10000012    text12  text12  text12  text12  text12
10000015    text15  text15  text15  text15  text15
10000025    text25  text25  text25  text25  text25

我使用strstr（）函数来比较字符串，但它不起作用，代码列在下面：

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

int main()
{
    char line[150][80] = {0};  
    char line2[150][80] = {0};  

    int i = 0, b = 0;

    FILE *file1 = fopen("file1.csv", "r");
    FILE *file2 = fopen("file2.csv", "r");
    FILE *file3 = fopen("file3.csv", "w");

    while (fscanf(file1, "%79[^\n]\n", line[i]) != EOF) {
        i++;
        while (fscanf(file2, "%79[^\n]\n", line2[b]) != EOF) {
            b++;
        }
        if (strstr(line[i],line2[b]))
        fprintf(file3, "%s\n", line[i]);
     }

    fclose(file1);
    fclose(file2);
    fclose(file3);
    return 0;
}

提前致谢！

Answer 1

正如放松和其他评论者正确指出的那样，在每file2.csv次阅读迭代中重新阅读file1.csv是没有意义的。只需阅读一次，然后循环遍历line2[]数组。

其次，您可以逐行处理file1.csv并在处理后立即忘记当前行的内容，因此也无需使用line[][]数组。

这是您修改后的代码：

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#define CHECK_FILE_OPEN(file, name) \
    if (file == NULL) { \
        printf("Failed to open %s\n", name); \
        return 1; \
    }

int main()
{
    char line[150] = "";  
    char line2[150][80] = {0};

    int b = 0;
    int filterCount = 0;

    FILE *file1 = fopen("file1.csv", "r");
    CHECK_FILE_OPEN(file1, "file1.csv");
    FILE *file2 = fopen("file2.csv", "r");
    CHECK_FILE_OPEN(file2, "file2.csv");
    FILE *file3 = fopen("file3.csv", "w");
    CHECK_FILE_OPEN(file3, "file3.csv");

    while (fscanf(file2, "%79[^\n]\n", line2[b]) != EOF) {
        b++;
    }
    filterCount = b;
    while (fscanf(file1, "%79[^\n]\n", line) != EOF) {
        for (b = 0; b < filterCount; b++) {
            if (strstr(line,line2[b])) {
                fprintf(file3, "%s\n", line);
                break;
            }
        }
     }

    fclose(file1);
    fclose(file2);
    fclose(file3);
    return 0;
}

但是，您的输入数据似乎足够具体，可以进行一些优化假设。特别是，（a）所有文件按第一列值排序，（b）第二列只是第一列的简化副本（就行索引值而言）。

在这种情况下，您可能希望实现一次性处理。逐行扫描两个文件，比较索引值。然后在每次迭代时前进到第一个文件的下一行。从第二个文件开始，只有在第一个文件中找到当前行时才移动到下一行。

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#define CHECK_FILE_OPEN(file, name) \
    if (file == NULL) { \
        printf("Failed to open %s\n", name); \
        return 1; \
    }

int main()
{
    char line1[150] = "";  
    char line2[150] = "";

    FILE *file1 = fopen("file1.csv", "r");
    CHECK_FILE_OPEN(file1, "file1.csv");
    FILE *file2 = fopen("file2.csv", "r");
    CHECK_FILE_OPEN(file2, "file2.csv");
    FILE *file3 = fopen("file3.csv", "w");
    CHECK_FILE_OPEN(file3, "file3.csv");

    bool eof1 = fscanf(file1, "%79[^\n]\n", line1) == EOF;
    bool eof2 = fscanf(file2, "%79[^\n]\n", line2) == EOF;

    while (!eof1 && !eof2) {
        if (strstr(line1, line2)) {
            fprintf(file3, "%s\n", line1);
            eof2 = fscanf(file2, "%79[^\n]\n", line2) == EOF;
        }
        eof1 = fscanf(file1, "%79[^\n]\n", line1) == EOF;
    }

    fclose(file1);
    fclose(file2);
    fclose(file3);
    return 0;
}

Answer 2

编辑：正如Peter Miehle所说，在这种情况下，file2中的每一行都在file1中匹配，并且它只匹配一次;并且两个文件都已排序。

如果您不想回放file2.csv，则需要在主循环中读取它，并在子循环中读取file1.csv。每次读取新行时，子串的搜索必须在子循环中执行。这是一个似乎有用的代码：

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

int main()
{
    char line[150][80] = {0};
    char line2[150][80] = {0};

    int i = 0, b = 0;

    FILE *file1 = fopen("file1.csv", "r");
    FILE *file2 = fopen("file2.csv", "r");
    FILE *file3 = fopen("file3.csv", "w");

    while (fscanf(file2, "%79[^\n]\n", line2[b]) != EOF) {

        while (fscanf(file1, "%79[^\n]\n", line[i]) != EOF) {

            if (strstr(line[i],line2[b])) {
              printf("\nFound: %s",line2[b]);
              fprintf(file3, "%s\n", line[i]);
              i++;
              break;
            }
            i++;
        }
        b++;
     }

    fclose(file1);
    fclose(file2);
    fclose(file3);
    return 0;
}

Answer 3

非常感谢所有为解答和评论我的问题而贡献时间和精力的人。根据你的答案和建议，我刚刚想出来了，这是代码：

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

int numberOfLines(char *fn)
{
    FILE *file = fopen(fn, "r"); 
    char line[1024]=""; 
    int no = 0;
    if(!file) 
    return 0;
    while(!feof(file)) 
    { 
    fgets(line, sizeof(line), file); no++;
    }
    fclose(file);
    return no;
}


int main()
{
    char line[150][80];  
    char line2[150][80];  

    int i, j;

    FILE *file1 = fopen("file1.csv", "r");
    FILE *file2 = fopen("file2.csv", "r");
    FILE *file3 = fopen("file3.csv", "w");

   char fileOne[100]="file1.csv"; 
   int nolFileone = 0;
   nolFileone = numberOfLines(fileOne);


   char fileTwo[100]="file2.csv"; 
   int nolFiletwo = 0;
   nolFiletwo = numberOfLines(fileTwo);   

      for (i = 0; i < nolFileone; i++){
        fscanf(file1, "%79[^\n]\n", line[i]);

            for (j = 0; j < nolFiletwo-1; j++){
                fscanf(file2, "%79[^\n]\n", line2[j]);       
                if (strstr(line[i], line2[j]))
                fprintf(file3, "%s\n", line[i]);

            }
      }


    fclose(file1);
    fclose(file2);
    fclose(file3);
    return 0;
}

如何使用strstr（）函数比较C中字符数组中的内容？

3 个答案: