pcre2条件替换正则表达式

时间:2017-10-21 18:48:33

标签: c regex pcre

我正在尝试编写一些正则表达式来插入基于条件的框图字符,但我不断得到编译错误子模式名称。

这是我的代码:

int match_pkg_details(char **pkgdetail, char *pkginfo)
{
    PCRE2_SPTR pattern = (PCRE2_SPTR)"^(?!Name|Architecture|URL|Licenses|"\
                    "Installed Size|Packager|Build Date|"\
                    "Install Date|Install Script|Validated By| *$).*$";
    *pkgdetail = malloc(4096); // FIXME malloc in initializer
    char *worker = *pkgdetail;
    size_t pattern_length = strlen((char *)pattern);
    int errornumber;
    PCRE2_SIZE erroroffset;
    pcre2_code *regex = pcre2_compile(
            pattern,
            pattern_length,
            PCRE2_MULTILINE,
            &errornumber,
            &erroroffset,
            NULL);
    if (regex == NULL)
    {
        PCRE2_UCHAR buffer[256];
        pcre2_get_error_message(errornumber, buffer, sizeof(buffer));
        printf("PCRE2 compilation failed at offset %d: %s\n", (int)erroroffset,
            buffer);
        return 1;
    }

    PCRE2_SPTR replacement = (PCRE2_SPTR)"(?(?=^Install Reason) a | ((?=(\\w) b | ((?=(\\s) c )))))";
                                                                                    // if starts with Install Reason replace with bottom line arrow }}}
    size_t replacement_length = strlen((char*)replacement);
    pcre2_code *replacement_regex = pcre2_compile(
            replacement,
            replacement_length,
            PCRE2_EXTENDED,
            &errornumber,
            &erroroffset,
            NULL);
    if (replacement_regex == NULL)
    {
        PCRE2_UCHAR buffer[256];
        pcre2_get_error_message(errornumber, buffer, sizeof(buffer));
        printf("PCRE2 compilation failed at offset %d: %s\n", (int)erroroffset,
               buffer);
        return 1;
    }
    pcre2_match_data *match_data =
            pcre2_match_data_create_from_pattern(regex, NULL);

    PCRE2_SPTR subject = (PCRE2_SPTR)pkginfo;
    size_t length = strlen((char *)subject);

    PCRE2_SIZE *ovector = pcre2_get_ovector_pointer(match_data);
    ovector[1] = 0;

    int rc;
    PCRE2_SIZE offset = 0;
    uint32_t options = PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED;
    while (offset < length - 1 && (rc =
         pcre2_match(regex, subject, length, offset, options, match_data, NULL)))
    {
        offset = ovector[1];
        options = 0;

        if (rc == PCRE2_ERROR_NOMATCH)
        {
            ovector[1] = offset + 1;
            continue;
        }

        for (int i = 0; i < rc; i++)
        {
            PCRE2_SIZE worker_len = strlen(worker);
            PCRE2_UCHAR output[4096];
            PCRE2_SIZE outlen;
            int rs = pcre2_substitute(
                    replacement_regex,
                    subject,
                    length,
                    offset,
                    PCRE2_SUBSTITUTE_EXTENDED,
                    NULL,
                    NULL,
                    (PCRE2_SPTR)"@",
                    1,
                    output,
                    &outlen);
            PCRE2_SPTR substring_start = subject + ovector[2*i];
            size_t substring_length = ovector[2*i+1] - ovector[2*i];
            snprintf(worker, 4096, "%.*s\n", (int)substring_length, (char*)substring_start);
            worker += (int)substring_length + 1;
        }
    }

    pcre2_match_data_free(match_data);
    pcre2_code_free(regex);
    return 0;
}

我正在匹配的字符串:

Name            : cinnamon 
Version         : 3.4.6-1 
Description     : Linux desktop which provides advanced innovative features and 
                  a traditional user experience 
Architecture    : x86_64 
URL             : https://github.com/linuxmint/Cinnamon 
Licenses        : GPL2 
Groups          : None 
Provides        : None 
Depends On      : accountsservice  caribou  cinnamon-settings-daemon  
                  cinnamon-session cinnamon-translations  cjs  clutter-gtk 
                  gnome-backgrounds  gnome-themes-standard  gstreamer  
                  libgnome-keyring  libkeybinder3  librsvg  muffin  
                  python2-cairo  python-dbus  python2-dbus  python2-pillow  
                  python2-pam  python2-pexpect  python2-pyinotify  python2-lxml  
                  cinnamon-control-center  cinnamon-screensaver  cinnamon-menus                   
                  libgnomekbd  network-manager-applet  nemo  polkit-gnome  xapps  
                  python2-gobject 
Optional Deps   : blueberry: Bluetooth support [installed]
                  gnome-panel: fallback mode
                  metacity: fallback mode
                  system-config-printer: printer settings [installed] 
Required By     : cinnamon-sound-effects 
Optional For    : None
Conflicts With  : None 
Replaces        : None 
Installed Size  : 8.31 MiB 
Packager        : Antonio Rojas <arojas@archlinux.org> 
Build Date      : Sat 09 Sep 2017 05:38:21 AM CDT 
Install Date    : Sat 09 Sep 2017 11:37:44 AM CDT 
Install Reason  : Installed as a dependency for another package 
Install Script  : No 
Validated By    : Signature

目前,如果我删除替换组,我会:

Version         : 3.4.6-1
Description     : Linux desktop which provides advanced innovative features
                    and a traditional user experience
Provides        : None
Depends On      : accountsservice  caribou  cinnamon-settings-daemon
                  cinnamon-session  cinnamon-translations  cjs  clutter-gtk  gnome-backgrounds
                  gnome-themes-standard  gstreamer  libgnome-keyring  libkeybinder3  librsvg
                  muffin  python2-cairo  python-dbus  python2-dbus  python2-pillow  python2-pam
                  python2-pexpect  python2-pyinotify  python2-lxml  cinnamon-control-center
                  cinnamon-screensaver  cinnamon-menus  libgnomekbd  network-manager-applet
                  nemo  polkit-gnome  xapps  python2-gobject
Optional Deps   : blueberry: Bluetooth support [installed]
Required By     : cinnamon-sound-effects
Optional For    : None
Conflicts With  : None
Replaces        : None
Install Reason  : Installed as a dependency for another package

预期输出如下:

├─ Version         : 3.4.6-1
├─ Description     : Linux desktop which provides advanced innovative features
│                    and a traditional user experience
├─ Provides        : None
├─ Depends On      : accountsservice  caribou  cinnamon-settings-daemon
│                    cinnamon-session  cinnamon-translations  cjs  clutter-gtk  gnome-backgrounds
│                    gnome-themes-standard  gstreamer  libgnome-keyring  libkeybinder3  librsvg
│                    muffin  python2-cairo  python-dbus  python2-dbus  python2-pillow  python2-pam
│                    python2-pexpect  python2-pyinotify  python2-lxml  cinnamon-control-center
│                    cinnamon-screensaver  cinnamon-menus  libgnomekbd  network-manager-applet
│                    nemo  polkit-gnome  xapps  python2-gobject
├─ Optional Deps   : blueberry: Bluetooth support [installed]
├─ Required By     : cinnamon-sound-effects
├─ Optional For    : None
├─ Conflicts With  : None
├─ Replaces        : None
└─ Install Reason  : Installed as a dependency for another package

a,b和c仅用于测试目的(我想我应该用命名的捕获组替换它们)。一旦我让替换工作正常,我将打破regex_compile部分的自己的方法。如何使用pcre2_substitute替换命名组?

1 个答案:

答案 0 :(得分:1)

你试图在错误的地方做你的逻辑。您需要在替换模式中处理它,而不是在正则表达式模式中处理它。

首先,让我们编写一个模式来识别字符串的不同部分:

^(?:
    (?<remove>(?:
        Name|Architecture|URL|Licenses|
        Installed[ ]Size|Packager|Build[ ]Date|
        Install[ ]Date|Install[ ]Script|Validated[ ]By
    )\s*:[^\n]*\n)
    |(?<last>(?=Install[ ]Reason\s*:))
    |(?<field>(?=\S))
    |(?<cont>(?=\s))
)

Demo

使用mx选项(PCRE2_MULTILINE | PCRE2_EXTENDED),但我们在C代码中确实不需要PCRE2_EXTENDED

这将识别字符串的某些部分,并在结果中填充一个命名的捕获组:

  • remove要删除的部分
  • 最后一个字段
  • last
  • 其他字段
  • field
  • cont表示值继续(没有字段标签的行)

接下来,我们必须用不同的字符串替换每个部分:

  • remove =&gt; (空字符串)
  • last =&gt; └─(我将在下面的程序中使用\-
  • field =&gt; ├─(我将在下面的程序中使用+-
  • cont =&gt; (我将在下面的程序中使用|

我们可以让PCRE通过PCRE2_SUBSTITUTE_EXTENDEDdocs)处理该问题:

  

设置PCRE2_SUBSTITUTE_EXTENDED的第二个效果是为群组替换增加更多灵活性。语法类似于Bash使用的语法:

${<n>:-<string>}
${<n>:+<string1>:<string2>}
     

和以前一样,<n>可以是组号或名称。第一种形式指定默认值。如果设置了组<n>,则插入其值;如果不是,则展开<string>并插入结果。第二种形式指定分别设置或取消设置组<n>时展开和插入的字符串。第一种形式只是

的便捷简写
${<n>:+${<n>}:<string>}

因此,使用该语法,我们的替换字符串如下所示:

${remove:+:${last:+\\- :${field:++- :${cont:+|  :}}}}

这是一个完整的演示:

#include <stdio.h>

#define PCRE2_CODE_UNIT_WIDTH 8
#include <pcre2.h>

PCRE2_SPTR input =
    "Name            : cinnamon\n"
    "Version         : 3.4.6-1\n"
    "Description     : Linux desktop which provides advanced innovative features and\n"
    "                  a traditional user experience\n"
    "Architecture    : x86_64\n"
    "URL             : https://github.com/linuxmint/Cinnamon\n"
    "Licenses        : GPL2\n"
    "Groups          : None\n"
    "Provides        : None\n"
    "Depends On      : accountsservice  caribou  cinnamon-settings-daemon\n"
    "                  cinnamon-session cinnamon-translations  cjs  clutter-gtk\n"
    "                  gnome-backgrounds  gnome-themes-standard  gstreamer \n"
    "                  libgnome-keyring  libkeybinder3  librsvg  muffin \n"
    "                  python2-cairo  python-dbus  python2-dbus  python2-pillow\n"
    "                  python2-pam  python2-pexpect  python2-pyinotify  python2-lxml\n"
    "                  cinnamon-control-center  cinnamon-screensaver  cinnamon-menus\n"
    "                  libgnomekbd  network-manager-applet  nemo  polkit-gnome  xapps\n"
    "                  python2-gobject\n"
    "Optional Deps   : blueberry: Bluetooth support [installed]\n"
    "                  gnome-panel: fallback mode\n"
    "                  metacity: fallback mode\n"
    "                  system-config-printer: printer settings [installed]\n"
    "Required By     : cinnamon-sound-effects\n"
    "Optional For    : None\n"
    "Conflicts With  : None\n"
    "Replaces        : None\n"
    "Installed Size  : 8.31 MiB\n"
    "Packager        : Antonio Rojas <arojas@archlinux.org>\n"
    "Build Date      : Sat 09 Sep 2017 05:38:21 AM CDT\n"
    "Install Date    : Sat 09 Sep 2017 11:37:44 AM CDT\n"
    "Install Reason  : Installed as a dependency for another package\n"
    "Install Script  : No\n"
    "Validated By    : Signature\n";

PCRE2_SPTR pattern =
    "^(?:"
        "(?<remove>(?:"
            "Name|Architecture|URL|Licenses|"
            "Installed Size|Packager|Build Date|"
            "Install Date|Install Script|Validated By"
        ")\\s*:[^\n]*\n)"
        "|(?<last>(?=Install Reason\\s*:))"
        "|(?<field>(?=\\S))"
        "|(?<cont>(?=\\s))"
    ")";

PCRE2_SPTR replacement =
    "${remove:+:${last:+\\\\- :${field:++- :${cont:+|  :}}}}";

static void print_error(int code)
{
    PCRE2_UCHAR message[256];
    if (pcre2_get_error_message(code, &message, sizeof(message) / sizeof(PCRE2_UCHAR)))
        puts(message);
}

int main()
{
    pcre2_code *re;
    pcre2_match_context *match_context;
    int result, error;
    PCRE2_SIZE erroffset, outlength;
    PCRE2_UCHAR* outbuf;

    re = pcre2_compile(pattern, PCRE2_ZERO_TERMINATED, PCRE2_MULTILINE, &error, &erroffset, 0);
    if (!re)
    {
        print_error(error);
        return 1;
    }

    match_context = pcre2_match_context_create(0);

    outlength = 0;
    result = pcre2_substitute(
        re,
        input,
        PCRE2_ZERO_TERMINATED,
        0,
        PCRE2_SUBSTITUTE_GLOBAL | PCRE2_SUBSTITUTE_OVERFLOW_LENGTH | PCRE2_SUBSTITUTE_EXTENDED,
        0,
        match_context,
        replacement,
        PCRE2_ZERO_TERMINATED,
        0,
        &outlength
    );

    if (result != PCRE2_ERROR_NOMEMORY)
    {
        print_error(result);
        return ;
    }

    outbuf = malloc(outlength * sizeof(PCRE2_UCHAR));

    result = pcre2_substitute(
        re,
        input,
        PCRE2_ZERO_TERMINATED,
        0,
        PCRE2_SUBSTITUTE_GLOBAL | PCRE2_SUBSTITUTE_EXTENDED,
        0,
        match_context,
        replacement,
        PCRE2_ZERO_TERMINATED,
        outbuf,
        &outlength
    );

    if (result < 0)
    {
        print_error(result);
        return;
    }

    puts(outbuf);

    free(outbuf);
    pcre2_match_context_free(match_context);
    pcre2_code_free(re);

    return 0;
}

输出结果为:

+- Version         : 3.4.6-1
+- Description     : Linux desktop which provides advanced innovative features and
|                    a traditional user experience
+- Groups          : None
+- Provides        : None
+- Depends On      : accountsservice  caribou  cinnamon-settings-daemon
|                    cinnamon-session cinnamon-translations  cjs  clutter-gtk
|                    gnome-backgrounds  gnome-themes-standard  gstreamer
|                    libgnome-keyring  libkeybinder3  librsvg  muffin
|                    python2-cairo  python-dbus  python2-dbus  python2-pillow
|                    python2-pam  python2-pexpect  python2-pyinotify  python2-lxml
|                    cinnamon-control-center  cinnamon-screensaver  cinnamon-menus
|                    libgnomekbd  network-manager-applet  nemo  polkit-gnome  xapps
|                    python2-gobject
+- Optional Deps   : blueberry: Bluetooth support [installed]
|                    gnome-panel: fallback mode
|                    metacity: fallback mode
|                    system-config-printer: printer settings [installed]
+- Required By     : cinnamon-sound-effects
+- Optional For    : None
+- Conflicts With  : None
+- Replaces        : None
\- Install Reason  : Installed as a dependency for another package

我想我应该提一下,在你的情况下,手动执行字符串操作肯定会更容易,而不是通过正则表达式模式。