
时间:2016-05-08 14:19:28

标签: awk sed


echo 'Hello (world)' | sed 's/(/(40)/g;s/)/(41)/g'

此代码以Hello (40(41)world(41)失败,因为它还将处理第一次替换的输出。有什么方法可以跳过替换字符或在这里做条件分支。我不想使用临时(因为输入序列可能包含任何内容)。

4 个答案:

答案 0 :(得分:1)


$ echo 'Hello (world)' | sed 's/(/(40\n/g; s/)/(41)/g; s/\n/)/g'
Hello (40)world(41)


鉴于the answer you posted,这可能是您真正想要的(使用GNU awk代表ord(),多字符RSRT):

$ cat tst.awk
@load "ordchr"
BEGIN { RS = "[][(){}]"; ORS="" }
{ print $0 ( RT=="" ? "" : "(" ord(RT) ")" ) }

$ echo 'Hello (world) foo [bar] other {stuff} etc.' | awk -f tst.awk
Hello (40)world(41) foo (91)bar(93) other (123)stuff(125) etc.


$ cat tst.awk
    RS = "[][(){}]"
    ORS = ""
    for (i=0;i<=255;i++) {
        char = sprintf("%c",i)
        map[char] = "(" i ")"
{ print $0 ( RT=="" ? "" : map[RT] ) }

$ echo 'Hello (world) foo [bar] other {stuff} etc.' | awk -f tst.awk
Hello (40)world(41) foo (91)bar(93) other (123)stuff(125) etc.



$ head -10 file1m
When (chapman) billies leave [the] street, And drouthy {neibors}, neibors, meet;
As market days are wearing late, And folk begin to [tak] the gate,
While (we) sit bousing {at} the nappy, An' getting [fou] and unco happy,
We think na on the [lang] Scots (miles), The mosses, {waters}, slaps and stiles,
That lie between us and our hame, Where sits our sulky, sullen dame,
Gathering her [brows] like gathering storm, (Nursing) her wrath to keep it warm.
This truth fand honest Tam o' Shanter,
As he frae Ayr ae night did canter:
(Auld Ayr, wham ne'er a town surpasses,
For honest men and bonie lasses).


$ wc file1m
 1000000 10500000 60400000 file1m


$ time sed 's/(/(40\n/g; s/)/(41)/g; s/\n/)/g; s/\[/(91)/g; s/\]/(93)/g; s/{/(123)/g; s/}/(125)/g;' file1m > sed.out
real    0m7.488s
user    0m7.378s
sys     0m0.093s

$ cat function.awk
@load "ordchr"
BEGIN { RS = "[][(){}]"; ORS="" }
{ print $0 ( RT=="" ? "" : "(" ord(RT) ")" ) }

$ time awk -f function.awk file1m > awk_function.out
real    0m7.426s
user    0m7.269s
sys     0m0.155s

$ cat array.awk
    RS = "[][(){}]"
    ORS = ""
    for (i=0;i<=255;i++) {
        char = sprintf("%c",i)
        map[char] = "(" i ")"
{ print $0 ( RT=="" ? "" : map[RT] ) }

$ time awk -f array.awk file1m > awk_array.out
real    0m4.758s
user    0m4.648s
sys     0m0.092s


$ head -10 sed.out
When (40)chapman(41) billies leave (91)the(93) street, And drouthy (123)neibors(125), neibors, meet;
As market days are wearing late, And folk begin to (91)tak(93) the gate,
While (40)we(41) sit bousing (123)at(125) the nappy, An' getting (91)fou(93) and unco happy,
We think na on the (91)lang(93) Scots (40)miles(41), The mosses, (123)waters(125), slaps and stiles,
That lie between us and our hame, Where sits our sulky, sullen dame,
Gathering her (91)brows(93) like gathering storm, (40)Nursing(41) her wrath to keep it warm.
This truth fand honest Tam o' Shanter,
As he frae Ayr ae night did canter:
(40)Auld Ayr, wham ne'er a town surpasses,
For honest men and bonie lasses(41).
$ wc sed.out
 1000000 10500000 68800000 sed.out
$ diff sed.out awk_function.out
$ diff sed.out awk_array.out

答案 1 :(得分:0)


#! /bin/sh

awk '
    BEGIN    { _ord_init() }

    function _ord_init(low, high, i, t) {
        low = sprintf("%c", 7) # BEL is ascii 7
        if (low == "\a") {
            low = 0;
            high = 127;
        } else if (sprintf("%c", 128 + 7) == "\a") {
            low = 128;
            high = 255;
        } else {
            low = 0;
            high = 255;
        for (i = low; i <= high; i++) {
            t = sprintf("%c", i);
            _ord_[t] = i;
    function ord(str, c) {
        c = substr(str, 1, 1)
        return _ord_[c]

    // {
        split($0, array, "\\[|\\]|\\(|\\)|\\{|\\}", separators);
        len = length(array);
        seplen = length(separators);
        for (i = 1; i < len; ++i) {
            printf "%s(%s)", array[i], ord(separators[i]);
        printf "%s", array[len];

答案 2 :(得分:0)


perl -pe 'BEGIN { %h = ("(" => "(40)", ")" => "(41)" ); 
        $r = join("|", map { quotemeta } keys %h); }

答案 3 :(得分:0)


$ echo 'Hello (world)' | perl -pe 's/\(/(40)/g; s/(?<!\(40)\)/(41)/g'
Hello (40)world(41)