计算文件中单词的出现次数

时间:2018-02-26 19:08:51

标签: c arrays file

如果您阅读文件,我的wordOcurrences不起作用。它们可以在计数中出现两次,因此它不能正确计数,但如果我从标准输入输入则计数正确。 所以我必须在文件(-i input.txt)中读取该文件中的单词和单词出现次数。将结果输出到使用-o output.txt指定的特定文件中。如果有-c,它应该忽略标点符号并转换为小写

主要

#include "count.h"
#include <stdio.h>
#include <ctype.h>
#include <string.h>
#include <stdlib.h>
//#include "wordOccurances.h"
#include "wordOccurrences.c"

int main(int argc, char **argv)
{   
        //Initialize variables
        FILE *fi; // input file     
        FILE *fo =stdout; //output file
        char buffer[1000];

        char *input; //for manually entering string with -c
        input = (char*)malloc(100 * sizeof(char));
        char *name = "invalid";

        int wordcount; // the number of words

        char *ch; //a single character
        ch = (char*)malloc(100000 * sizeof(char));
        int  c = 0, i, iFlag = 0, oFlag = 0, cFlag = 0;
        char ptr1[50][100];
        char *ptr;
        if(argc == 1)
        {
            printf("Default settings\n");
        }
        else
        {
            for(i=1; i<argc;i++)
            {
                if(strcmp(argv[i], "-i")==0)
                {
                    printf("input\n");
                    iFlag = 1;
                    fi = fopen(argv[++i],"r");
                }
                if(strcmp(argv[i], "-o")==0)
                {
                    printf("output\n");
                    oFlag = 1;
                    fo = fopen(argv[++i],"w");
                }
                if(strcmp(argv[i], "-c")==0)
                {
                    cFlag = 1;
                }
            }
        }
        if(iFlag ==1)
        {
            wordcount = countForFile(fi, wordcount);
            fprintf(fo,"Word count is: %d \n", wordcount);
            wordOccurencesForFile(fi, cFlag,fo);    
        }
        else
        {
            printf("Enter text: ");
            scanf(" %[^\n]s", input);

                        //Loop through input
                        int i = 0;
                        if(cFlag == 1)
                        {
                            int i =0;
                            for( i = 0;input[i]!='\0'; i++) 
                            {
                            //find upperCase letters
                                if(input[i] >= 'A' && input[i] <= 'Z')
                                {
                                    //overwrite to lowerCase
                                    input[i] = tolower(input[i]);

                                    //input[i] = input[i] +32;

                                }//end of if statement  

                            //ignoring punctuation  
                                if(input[i] == ',' || input[i] == '.' || input[i] == '!' || input[i] == '?' || input[i] == '"' || input[i] == ':' || input[i] ==';' || input[i] == '-')
                                {
                                    input[i] = ' ';
                                }
                            } //end of for loop
                        }
                        wordcount = 0;
                        for(i = 0;input[i] != '\0'; i++)
                        {
                            if(input[i] == ' ' && input[i+1] != ' ')
                            wordcount++;

                        }// end of while loop
                        fprintf(fo,"WordCount is: %d\n", wordcount +1);

                        //count occurrences
                        wordOccurences(input, fo);
        }

        if(oFlag == 1)
        {fclose(fo);}

}

wordOccurrences

 /* 
     * C Program to Find the Frequency of  Every Word in a 
     * given String
     */
    #include <stdio.h>
    #include <string.h>
    #include "functions.h"

    wordOccurences(char *input, FILE *output)
    {
        int count = 0, c = 0, i, j = 0, k, space = 0;
        char str[100], p[50][100], str1[20], ptr1[50][100];
        char *ptr;

       // printf("Enter the string\n");
        //scanf(" %[^\n]s", input);
        printf("string length is %d\n", strlen(input));
        for (i = 0;i<strlen(input);i++)
        {
            if ((input[i] == ' ')||(input[i] == ', ')||(input[i] == '.'))
            {
                space++;
            }
        }
        for (i = 0, j = 0, k = 0;j < strlen(input);j++)
        {
            if ((input[j] == ' ')||(input[j] == 44)||(input[j] == 46))  
            {    
                p[i][k] = '\0';
                i++;
                k = 0;
            }        
            else
                 p[i][k++] = input[j];
        }
        k = 0;
        for (i = 0;i <= space;i++)
        {
            for (j = 0;j <= space;j++)
            {
                if (i == j)
                {
                    strcpy(ptr1[k], p[i]);
                    k++;
                    count++;
                    break;
                }
                else
                {
                    if (strcmp(ptr1[j], p[i]) != 0)
                        continue;
                    else
                        break;
                }
            }
        }
        for (i = 0;i < count;i++) 
        {
            for (j = 0;j <= space;j++)
            {
                if (strcmp(ptr1[i], p[j]) == 0)
                    c++;
            }

            fprintf(output,"%s -> %d times\n", ptr1[i], c);
            c = 0;
        }
    }

    wordOccurencesForFile(FILE *fp, int cFlag, FILE *output)
    {
        fseek(fp, 0, SEEK_END);
        long fsize = ftell(fp);
        fseek(fp, 0, SEEK_SET);

        char *str = (char*)malloc(fsize + 1);
       fread(str, fsize, 1, fp);
       fclose(fp);

       str[fsize] = 0;
       int count = 0, c = 0, i, j = 0, k, space = 0;
       char p[1000][512], str1[512], ptr1[1000][512];
       char *ptr;

        if ( fp )
        {   
            for (i = 0;i<strlen(str);i++)
            {
                if (cFlag == 1)
                {
                    //ignoring punctuation  
                    if(str[i] == ',' || str[i] == '.' || str[i] == '!' 
                    || str[i] == '?' || str[i] == '"' || str[i] == ':' 
                    || str[i] ==';' || str[i] == '-')
                    {
                        str[i] = ' ';
                    }
                }

                if ((str[i] == ' ')||(str[i] == ',')||(str[i] == '.'))
                {
                    space++;
                }
            }

            for (i = 0, j = 0, k = 0;j < strlen(str);j++)
            {
                if ((str[j] == ' ')||(str[j] == 44)||(str[j] == 46))
                {
                    p[i][k] = '\0';
                    i++;
                    k = 0;
                }
                else
                {   
                    if (cFlag == 1)
                    {
                        //find upperCase letters
                        if(str[j] >= 'A' && str[j] <= 'Z')
                        {
                            //overwrite to lowerCase
                            str[j] = tolower(str[j]);

                        }//end of if statement
                    }

                    p[i][k++] = str[j];
                }
            }

           k = 0;
            for (i = 0;i <= space;i++)
            {
                for (j = 0;j <= space;j++)
                {
                    if (i == j)
                    {
                        strcpy(ptr1[k], p[i]);
                        k++;
                        count++;
                        break;
                    }
                    else
                    {
                        if (strcmp(ptr1[j], p[i]) != 0)
                        continue;
                        else
                        break;
                    }
                }
            }

            for (i = 0;i < count;i++)
            {
                for (j = 0;j <= space;j++)
                {
                    if (strcmp(ptr1[i], p[j]) == 0)
                    c++;
                }
                fprintf(output,"%s %d \n", ptr1[i], c);
                c = 0;
            }
        }
         else
        {
            printf("Failed to open the file\n");
        }
    }

0 个答案:

没有答案