对于字符串,查找和替换

时间:2010-12-10 11:04:12

标签: c algorithm string-search

查找一些文本并将其替换为C字符串中的新文本可能比预期的要复杂一些。 我正在寻找一种快速且时间复杂度较低的算法。

我应该使用什么?

6 个答案:

答案 0 :(得分:5)

我在C中找不到我喜欢的搜索/替换实现,所以我在这里展示自己的。它不使用strstr(),snprintf(),任意长度的临时缓冲区等。它只需要干草堆缓冲区足够大,以便在替换后保存结果字符串。

// str_replace(haystack, haystacksize, oldneedle, newneedle) --
//  Search haystack and replace all occurences of oldneedle with newneedle.
//  Resulting haystack contains no more than haystacksize characters (including the '\0').
//  If haystacksize is too small to make the replacements, do not modify haystack at all.
//
// RETURN VALUES
// str_replace() returns haystack on success and NULL on failure. 
// Failure means there was not enough room to replace all occurences of oldneedle.
// Success is returned otherwise, even if no replacement is made.
char *str_replace(char *haystack, size_t haystacksize,
                    const char *oldneedle, const char *newneedle);

// ------------------------------------------------------------------
// Implementation of function
// ------------------------------------------------------------------
#define SUCCESS (char *)haystack
#define FAILURE (void *)NULL

static bool
locate_forward(char **needle_ptr, char *read_ptr, 
        const char *needle, const char *needle_last);
static bool
locate_backward(char **needle_ptr, char *read_ptr, 
        const char *needle, const char *needle_last);

char *str_replace(char *haystack, size_t haystacksize,
                    const char *oldneedle, const char *newneedle)
{   
    size_t oldneedle_len = strlen(oldneedle);
    size_t newneedle_len = strlen(newneedle);
    char *oldneedle_ptr;    // locates occurences of oldneedle
    char *read_ptr;         // where to read in the haystack
    char *write_ptr;        // where to write in the haystack
    const char *oldneedle_last =  // the last character in oldneedle
        oldneedle +             
        oldneedle_len - 1;      

    // Case 0: oldneedle is empty
    if (oldneedle_len == 0)
        return SUCCESS;     // nothing to do; define as success

    // Case 1: newneedle is not longer than oldneedle
    if (newneedle_len <= oldneedle_len) {       
        // Pass 1: Perform copy/replace using read_ptr and write_ptr
        for (oldneedle_ptr = (char *)oldneedle,
            read_ptr = haystack, write_ptr = haystack; 
            *read_ptr != '\0';
            read_ptr++, write_ptr++)
        {
            *write_ptr = *read_ptr;         
            bool found = locate_forward(&oldneedle_ptr, read_ptr,
                        oldneedle, oldneedle_last);
            if (found)  {   
                // then perform update
                write_ptr -= oldneedle_len;
                memcpy(write_ptr+1, newneedle, newneedle_len);
                write_ptr += newneedle_len;
            }               
        } 
        *write_ptr = '\0';
        return SUCCESS;
    }

    // Case 2: newneedle is longer than oldneedle
    else {
        size_t diff_len =       // the amount of extra space needed 
            newneedle_len -     // to replace oldneedle with newneedle
            oldneedle_len;      // in the expanded haystack

        // Pass 1: Perform forward scan, updating write_ptr along the way
        for (oldneedle_ptr = (char *)oldneedle,
            read_ptr = haystack, write_ptr = haystack;
            *read_ptr != '\0';
            read_ptr++, write_ptr++)
        {
            bool found = locate_forward(&oldneedle_ptr, read_ptr, 
                        oldneedle, oldneedle_last);
            if (found) {    
                // then advance write_ptr
                write_ptr += diff_len;
            }
            if (write_ptr >= haystack+haystacksize)
                return FAILURE; // no more room in haystack
        }

        // Pass 2: Walk backwards through haystack, performing copy/replace
        for (oldneedle_ptr = (char *)oldneedle_last;
            write_ptr >= haystack;
            write_ptr--, read_ptr--)
        {
            *write_ptr = *read_ptr;
            bool found = locate_backward(&oldneedle_ptr, read_ptr, 
                        oldneedle, oldneedle_last);
            if (found) {    
                // then perform replacement
                write_ptr -= diff_len;
                memcpy(write_ptr, newneedle, newneedle_len);
            }   
        }
        return SUCCESS;
    }
}

// locate_forward: compare needle_ptr and read_ptr to see if a match occured
// needle_ptr is updated as appropriate for the next call
// return true if match occured, false otherwise
static inline bool 
locate_forward(char **needle_ptr, char *read_ptr,
        const char *needle, const char *needle_last)
{
    if (**needle_ptr == *read_ptr) {
        (*needle_ptr)++;
        if (*needle_ptr > needle_last) {
            *needle_ptr = (char *)needle;
            return true;
        }
    }
    else 
        *needle_ptr = (char *)needle;
    return false;
}

// locate_backward: compare needle_ptr and read_ptr to see if a match occured
// needle_ptr is updated as appropriate for the next call
// return true if match occured, false otherwise
static inline bool
locate_backward(char **needle_ptr, char *read_ptr, 
        const char *needle, const char *needle_last)
{
    if (**needle_ptr == *read_ptr) {
        (*needle_ptr)--;
        if (*needle_ptr < needle) {
            *needle_ptr = (char *)needle_last;
            return true;
        }
    }
    else 
        *needle_ptr = (char *)needle_last;
    return false;
}

使用示例

#define BUF 30
char *retval1, *retval2;
char message[BUF] = "Your name is $USERNAME.";
char username[] = "admin";
char username_toolong[] = "System Administrator";

int main() {
    retval1 = str_replace(message, BUF, "$USERNAME", username_toolong);
    retval2 = str_replace(message, BUF, "$USERNAME", username);
    if (!retval1)
        printf("Not enough room to replace $USERNAME with `%s'\n", username_toolong);
    if (!retval2)
        printf("Not enough room to replace $USERNAME with `%s'\n", username);
    printf("%s\n", message);
    return 0;
}

输出

  

没有足够的空间用“系统管理员”替换$ USERNAME   你的名字是管理员。

干杯。

答案 1 :(得分:2)

Knuth-Morris-Pratt(经典)或Boyer-Moore(有时更快)?

尝试使用Google搜索“字符串搜索算法”。

答案 2 :(得分:1)

使用std::string(来自<string>),您只需使用findreplace

编辑:Touché。这仅适用于C ++。

这对你有好处吗? http://www.daniweb.com/forums/thread51976.html

答案 3 :(得分:1)

我不禁想知道strstr()实现了什么算法。鉴于这些是相当标准的算法,strstr()的良好实现完全有可能使用其中一个。

然而,无法保证strstr()实现优化算法,或者从一个平台到另一个平台使用相同的算法。

答案 4 :(得分:0)

这是一个很好的代码

#include <stdio.h>
#include <string.h>

char *replace_str(char *str, char *orig, char *rep)
{
  static char buffer[4096];
  char *p;

  if(!(p = strstr(str, orig)))  // Is 'orig' even in 'str'?
    return str;

  strncpy(buffer, str, p-str); // Copy characters from 'str' start to 'orig' st$
  buffer[p-str] = '\0';

  sprintf(buffer+(p-str), "%s%s", rep, p+strlen(orig));

  return buffer;
}

int main(void)
{
  puts(replace_str("Hello, world!", "world", "Miami"));

  return 0;
}

答案 5 :(得分:0)

我的解决方案,基于其他人,但我相信更安全一点:

#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#define MAX_SOURCE_SIZE (0x100000)

char * searchReplace(char * string, char *toReplace[], char *replacements[], int numReplacements){
    int i = 0;
    char *locOfToRep;
    char *toRep;
    char *rep;
    int lenToRep,lenStr,lenAfterLocRep;
    static char buffer[MAX_SOURCE_SIZE];
    for(i = 0; i < numReplacements; ++i){
        toRep = toReplace[i];
        rep = replacements[i];
        //if str not in the string, exit.
        if (!(locOfToRep = strstr(string,toRep))){
           exit(EXIT_FAILURE);
        }
        lenToRep = strlen(toRep); 
        lenStr = strlen(string); 
        lenAfterLocRep = strlen(locOfToRep); 

        //Print the string upto the pointer, then the val, and then the rest of the string.
        sprintf(buffer, "%.*s%s%s", lenStr-lenAfterLocRep, string,rep,locOfToRep+lenToRep);

        string = buffer;
    }
    return buffer;
}

int main(){
    char * string = "Hello, world!";
    int numVals;
    char *names[2] = {"Hello", "world"};
    char *vals[2] = {"Goodbye", "you"};
    numVals = 2;
    string = searchReplace(string, names, vals, numVals);
    printf("%s\n",string);
}