检查单词是否可以由较小的给定单词组成的错误代码(分词)

时间:2015-10-03 04:30:22

标签: c algorithm trie

错误的代码检查单词是否可以由较小的给定单词组成(分词)。这是我为上述问题编写的代码,但是在线裁判声明它是不正确的,可能的原因是什么?我应该如何修改我的代码?

$str = "Â some text";

echo $str = preg_replace('/[^\x00-\x7F]/',"",$str);

2 个答案:

答案 0 :(得分:2)

这是一种方法。这编译并运行。它显示解析的结果。它尝试从当前目录中名为“dictionary.text”的文件中读取字典。您可以将其更改为将字典放在任何位置。我对它进行了大量评论,以帮助您理解它,但它有一些微妙的C事物,您可能需要真正思考和弄清楚。一点建议:在程序中将所有命名为尽可能极其准确(但相当简洁)。在尝试调试或弄清楚你做错了什么时,这将有很大帮助。粗心的名字确实让代码混乱,难以调试。

祝你好运!

示例:

$ gcc -o wordsplitter wordsplitter.c

$ wordsplitter xyzhellogoodbyefoodogcatpigcarwhereareyouhorse

xyz“你好”“再见”foo“dog”“cat”pigcar“where”“是”“你”马

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#define DICTIONARY_FILEPATH  "dictionary.txt"
#define MAX_WORD_SIZE  100
/*
 * Error codes (usually this is put in a header file and included)
 */
#define SUCCESS         0
#define FILE_NOT_FOUND -1
#define OUT_OF_MEMORY  -2

typedef struct word {
    struct word *next;
    char *word;
} word_t;

word_t *dictionaryListhead = NULL;

typedef struct wordsubcomponent {
    struct wordsubcomponent *next;
    char *text;
    int isDictionaryWord;
} wordsubcomponent_t;

int
loadDictionaryFromFile(char *filename, word_t **listhead)
{
    char wordFromFile[MAX_WORD_SIZE];
    word_t *lastWordStored = NULL;

    FILE *dictionaryFile = fopen(filename, "r"); 
    if (dictionaryFile == NULL) {
        return FILE_NOT_FOUND;
    }    
    while(fgets(wordFromFile, sizeof(wordFromFile), dictionaryFile)) {
        word_t *newDictionaryWordNode;
        if ((newDictionaryWordNode = calloc(sizeof(word_t), 1)) == NULL) { // calloc automatically zeroes memory
            return OUT_OF_MEMORY;
        }
        char *cp = strchr(wordFromFile, '\n');
        if (cp != NULL)
            *cp = '\0'; // get rid of trailing \n

        newDictionaryWordNode->word = strdup(wordFromFile);     
        if (*listhead == NULL) {
            lastWordStored = *listhead = newDictionaryWordNode;
        } else {
            lastWordStored = lastWordStored->next = newDictionaryWordNode;
        }
    }
    fclose(dictionaryFile);
    return SUCCESS;
}

wordsubcomponent_t 
*newsubcomponent() {
    wordsubcomponent_t *subcomp = NULL;
    if ((subcomp = calloc(sizeof(wordsubcomponent_t), 1)) != NULL) { 
        subcomp->text = strdup("");  // seed with empty string (instead of NULL) so we can append
    } else {
        fprintf(stderr, "out of memory (fatal). program exiting\n");
        exit(-1);
    }
    return subcomp;
}

/*
 * Returns an linked list of word subcomponents for the given word, split up around dictionary words
 */
wordsubcomponent_t *getWordSubcomponents(char *wordToParse, word_t *listhead) {
    wordsubcomponent_t *subcomponents, *currSubcomp;
    subcomponents = currSubcomp = newsubcomponent();
    for (char *cp = wordToParse; cp < wordToParse + strlen(wordToParse);) { // exit when cp gets to end of word to parse.
        int matchFlag = 0;
        for (word_t *wordNode = listhead; wordNode != NULL; wordNode = wordNode->next) {
            if (strncasecmp(cp, wordNode->word, strlen(wordNode->word)) == 0) { // prefix of cur. ptr is dict word.
                if (strlen(currSubcomp->text) != 0) // Detected non-dict text in subcomp.
                    currSubcomp = currSubcomp->next = newsubcomponent(); // leave in list & add new subcomp for dict word.
                currSubcomp->text = wordNode->word; // save dict-word in subcomp
                currSubcomp->isDictionaryWord = 1;
                currSubcomp = currSubcomp->next = newsubcomponent(); // dict-word in list, so get new subcomp
                cp += strlen(wordNode->word); // advance cp past extracted dict-word
                matchFlag = 1;
                break; // break out of inner-loop
            }
        }
        if (!matchFlag)  { // No dict-word found at cp      
            char oneNullTerminatedLetter[2] = { *cp++, '\0' }; // put 1st ltr into NULL-terminated string & adv cp.         
            strcat(currSubcomp->text, oneNullTerminatedLetter); // append letter-as-string to curr subcomp
        }
    }
    return subcomponents;
}

void
dumpDictionary(word_t *listhead) {
    printf("\nList of dictionary words:\n");
    printf("----------------\n");
    for (word_t *wordNode = listhead; wordNode != NULL; wordNode = wordNode->next) {
        printf("   %s\n", wordNode->word);
    }
    printf("----------------\n\n");
}

int 
main(int argc, char **argv) 
{
    int status;
    if ((status = loadDictionaryFromFile(DICTIONARY_FILEPATH, &dictionaryListhead)) < 0) {
        switch(status) {
        case FILE_NOT_FOUND:
            fprintf(stderr, "Error accessing dictionary: %s\n", argv[0]);
            break;
        case OUT_OF_MEMORY:
            fprintf(stderr, "Out of memory");
            break;
        }
        return EXIT_FAILURE;
    }

    /*
     * Load dictionary first so we can show them the list of words if they didn't
     * pass in a command line argument with the word to parse.
     */    
    if (argc < 2) {
        fprintf(stderr, "Usage: %s <word_to_parse>\n\n", argv[0]);
        dumpDictionary(dictionaryListhead);
        return EXIT_FAILURE;
    }

    wordsubcomponent_t *subcomp = getWordSubcomponents(argv[1], dictionaryListhead);
    while(subcomp != NULL && strlen(subcomp->text) > 0) {
        if (subcomp->isDictionaryWord) 
            printf("\"%s\" ", subcomp->text);
        else
            printf("%s ", subcomp->text);
        subcomp = subcomp->next;
    }
    printf("\n");
    return EXIT_SUCCESS;
}

答案 1 :(得分:1)

@nerdist colony: loadDictionaryFromFile中存在资源泄漏。这意味着在发生错误时从此函数返回时文件指针未关闭。

以下是此功能的更正副本

int loadDictionaryFromFile(char *filename, word_t **listhead)
 {
    char wordFromFile[MAX_WORD_SIZE];
    word_t *lastWordStored = NULL;

    FILE *dictionaryFile = fopen(filename, "r"); 
    if (dictionaryFile == NULL) {
        return FILE_NOT_FOUND;
    }    
    while(fgets(wordFromFile, sizeof(wordFromFile), dictionaryFile)) {
        word_t *newDictionaryWordNode;
        if ((newDictionaryWordNode = calloc(sizeof(word_t), 1)) == NULL) { // calloc automatically zeroes memory
            fclose(dictionaryFile); // <-- Close the file pointer
            return OUT_OF_MEMORY;
        }
        char *cp = strchr(wordFromFile, '\n');
        if (cp != NULL)
            *cp = '\0'; // get rid of trailing \n

        newDictionaryWordNode->word = strdup(wordFromFile);     
        if (*listhead == NULL) {
            lastWordStored = *listhead = newDictionaryWordNode;
        } else {
            lastWordStored = lastWordStored->next = newDictionaryWordNode;
        }
    }
    fclose(dictionaryFile);
    return SUCCESS;
}