Question

我正在尝试使用标头 tokenizer.h 文件访问 tokenizer.c 中的方法，但是当我输入某些内容时，我无法在没有收到 Segmentation fault(core dump) 错误的情况下调用主方法中的“tokenize”方法到控制台。我试过重新排列指针和内存分配，我也得到了同学的帮助。我仍然收到错误。

#include <stdio.h>
#include <stdlib.h>
#include "tokenizer.h"
#include "history.h"

int main(){
  int noexit = 1;
  while(noexit){
    char input[100];
    printf("> ");

    fgets(input, 100, stdin);

    if(*input == '0'){
      noexit = 0;
    }
    else{
      char** tokens = tokenize(input);
      print_tokens(tokens);
      free_tokens(tokens);
    }
  }
  //past output
  /*printf(space_char(' '));*/
}

这是我的主要方法，在文件 uimain.c 中。这还没有完成，但只有当我删除

char** tokens = tokenize(input);
      print_tokens(tokens);
      free_tokens(tokens);

阻止。

#include <stdio.h>
#include <stdlib.h>
#include "tokenizer.h"

int space_char(char c){
  if(c == '\t' || c == ' '){
    return 1;
  }
  return 0;
}

int non_space_char(char c){
  if(c != '\t' || c != ' '){
    return 1;
  }
  return 0;
}

char *word_start(char *str){
  int i = 0;
  while(space_char(str[i]) == 1){
    i++;
  }
  return &str[i];
}

char *word_terminator(char *word){
  word  = word_start(word);
  int i = 0;
  
  while(non_space_char(word[i]) == 1){
    i = i+1;
  }
  
  return &word[i];
}

int count_words(char *str){
  int count = 0;
  int i = 0;
  while(str[i] != '\0') {
      if(space_char(str[i]) && non_space_char(str[i + 1]))
    count++;
      i++;
    }
    count++;
  return count;
}

char  *copy_str(char *inStr, short len){
  int i = 0;
  //MALLOC FOR NEW STR :D
  char *outStr = malloc((len+1) *sizeof(char));

  while(i<=len){
    outStr[i]  = inStr[i];
    i= i+1;
  }
  return outStr;
}

char **tokenize(char *str){
  int i = 0;
  printf("%s","int i");
  int len;
  printf("%s","len");
  int all = count_words(str);
  printf("%s","all");
  char **tokens = malloc((all+1) * sizeof(char*));
  printf("%s","tokens");
  char *pointer = str;
  printf("%s","pointer");

  while(i < all+1){
    pointer = word_start(pointer);
    printf("%s","word_start");
    len = (word_terminator(pointer) - word_start(pointer));
    printf("%s","len");
    tokens[i] = copy_str(pointer, len);
    pointer = word_terminator(pointer);
    i = i + 1;
  }
  tokens[i] = 0;
  return tokens;
}

void print_tokens(char **tokens){
  int i = 0;
  while(tokens[i] !=  NULL){
    printf("%s\n", tokens[i]);
    i =  i + 1;
  }
}

void free_tokens(char **tokens){
  int i = 0;
  //can't pass  len as param  :D
  while(tokens[i] != 0){
    free(tokens[i]);
    i = i + 1;
  }
  free(tokens);
}

这是 tokenizer.c

#ifndef _TOKENIZER_
#define _TOKENIZER_


/* Return true (non-zero) if c is a whitespace characer
   ('\t' or ' ').  
   Zero terminators are not printable (therefore false) */
int space_char(char c);

/* Return true (non-zero) if c is a non-whitespace 
   character (not tab or space).  
   Zero terminators are not printable (therefore false) */ 
int non_space_char(char c);

/* Returns a pointer to the first character of the next 
   space-separated word in zero-terminated str.  Return a zero pointer if 
   str does not contain any words. */
char word_start(char *str); 

/* Returns a pointer terminator char following *word */
char *word_terminator(char *word);

/* Counts the number of words in the string argument. */
int count_words(char *str);

/* Returns a fresly allocated new zero-terminated string 
   containing <len> chars from <inStr> */
char *copy_str(char *inStr, short len);

/* Returns a freshly allocated zero-terminated vector of freshly allocated 
   space-separated tokens from zero-terminated str.
   For example, tokenize("hello world string") would result in:
     tokens[0] = "hello"
     tokens[1] = "world"
     tokens[2] = "string" 
     tokens[3] = 0
*/
char **tokenize(char* str);

/* Prints all tokens. */
void print_tokens(char **tokens);

/* Frees all tokens and the vector containing themx. */
void free_tokens(char **tokens);

#endif

这是 tokenizer.h，以防万一

Answer 1

好吧，我试着编译你的代码。

首先，在 tokenizer.h 文件中，start_word() 函数被声明为返回一个字符，但在 tokenizer.c 文件中，它被定义为返回一个 char *。根据 tokenizer.h 中的描述，将其更改为返回 char *。

/* Returns a pointer to the first character of the next 
   space-separated word in zero-terminated str.  Return a zero pointer if 
   str does not contain any words. */
char *word_start(char *str);

现在，查看 tonkenize() 文件中的 tokenizer.c 函数，printf() 函数没有正确使用。要了解如何使用 printf()，请查看 this article。

根据您要打印的变量的类型更改说明符。 "%s" 仅适用于字符串；
然后，添加 '\n'（换行符）。缓冲区中的字节只有在达到 '\n' 后才会打印；
最后，去掉变量名中的引号。在 C 中，引号总是代表字符串。

以下是一些示例：

int i = 0;
printf("%d\n", i);
int len;
printf("%d\n", len);
int all = count_words(str);
printf("%d\n", all);
// (...)

如果您不打算打印变量并且这些 printf() 调用仅用于测试，请删除第一个参数（但不要忘记 '\n' 字符）。

int i = 0;
printf("i\n");
int len;
printf("len\n");
int all = count_words(str);
printf("all\n");
// (...)

在这些 printf() 调用之后，使用 malloc() 分配内存。

char **tokens = malloc((all+1) * sizeof(char*));

如果 tokens 变量代表一个以空字符结尾的字符串数组，而变量 all 代表变量 tokens 所拥有的字符串数量，那么它是正确的。问题是你没有检查内存分配是否成功。

char **tokens = malloc((all+1) * sizeof(char*));
if (tokens == NULL) {
    fprintf(stderr, "error: allocating memory\n");
    return NULL;
}

现在，关于 while 循环。

您首先获取单词的开头，然后计算长度，再次调用 word_start()，但您已经知道单词的开头；
然后，您复制字符串并移动到单词的末尾，再次调用 word_terminator() 函数而不是仅存储先前的结果。注意word的结尾是下一次迭代的开始；
i = i + 1; 指令可以替换为 i++;;
迭代范围必须是从零到字符串数减一。

这是我对 tokenize() 函数的建议。

char **tokenize(char *str) {
    int i = 0;
    int all = count_words(str);
    char **tokens = malloc((all+1) * sizeof(char*));
    if (tokens == NULL) {
        fprintf(stderr, "error: allocating memory\n");
        return NULL;
    }
    char *start = str, *end = str;
    while (i < all) {
        start = word_start(end);
        end = word_terminator(start);
        tokens[i] = copy_str(start, end - start);
        i++;
    }
    tokens[i] = NULL;
    return tokens;
}

现在，关于 copy_str() 函数。

您再次忘记检查内存分配是否成功；
while 循环的范围是从零到字符串长度减一；
您在返回之前忘记以空字符结尾。

这是我对 copy_str() 函数的建议。

char *copy_str(char *inStr, short len) {
    int i = 0;
    char *outStr = malloc((len+1) *sizeof(char));
    if (outStr == NULL) {
        fprintf(stderr, "error: allocating memory\n");
        return NULL;
    }
    while (i < len) {
        outStr[i] = inStr[i];
        i++;
    }
    outStr[i] = '\0';
    return outStr;
}

查看word_start()和word_terminator()的实现方式，我们可以清楚地了解到，如果我调用word_start(" ");或word_terminator("asdfg");，会发生分段错误。那是因为您只检查是否分别到达了非空格/空格字符。您需要检查 '\0'（空字符）以在字符串末尾中断循环。

char *word_start(char *str) {
    int i = 0;
    while (space_char(str[i]) == 1) {
        if ( str[i] == '\0' )
            return NULL;
        i++;
    }
    return &str[i];
}

char *word_terminator(char *word) {
    word  = word_start(word);
    if ( word == NULL )
        return NULL;
    int i = 0;
    while (non_space_char(word[i]) == 1) {
        if ( word[i] == '\0' )
            return &word[i-1];
        i++;
    }
    return &word[i];
}

再次编译程序后，结果如下。

$ ./uimain
> Miguel Carvalho 22
Miguel Carvalho 22


> ^C
$

虽然程序有效，但代码可以改进。这是一些注意事项。

C 标准库包含一些对这个程序有用的函数。搜索 ctype.h 和 string.h；
count_words() 函数计数不正确。不要在循环中使用 space_char() 和 non_space_char() 函数，而是尝试使用 word_start() 和 word_terminator() 函数，直到返回 NULL。

分段错误（核心转储）？

1 个答案: