文件操作:删除字符串的每次出现都会执行某种特定模式后的第一次出现

时间:2019-04-03 12:50:40

标签: linux awk sed grep

我想删除文件中多次出现的行,但是要保留某些行。我该怎么办?

这是我文件的一部分,我想更改:

§M: 1, K: 2
    name, time,    cycle,    instr, L1-miss, LLC-miss, br-miss,     task, scale,  IPC,  CPU,  GHz 
Fastor2D, 0.00, 88378.00, 23272.00,  646.00,  1525.00,  241.00, 17447.00,     1, 0.26, 4.71, 5.07 
    name, time,    cycle,    instr, L1-miss, LLC-miss, br-miss,    task, scale,  IPC,   CPU,  GHz 
Fastor2D, 0.00, 32083.00, 17690.00,  148.00,    28.00,  116.00, 6437.00,     1, 0.55, 25.96, 4.98 
    name, time,    cycle,    instr, L1-miss, LLC-miss, br-miss,    task, scale,  IPC,   CPU,  GHz 
Fastor2D, 0.00, 31342.00, 17690.00,  161.00,    23.00,  100.00, 6125.00,     1, 0.56, 36.46, 5.12 

   name, time,    cycle,    instr, L1-miss, LLC-miss, br-miss,    task, scale,  IPC,   CPU,  GHz 
Blitz2D, 0.00, 33418.00, 17933.00,  168.00,    83.00,  109.00, 6473.00,     1, 0.54, 12.50, 5.16 

     name, time,     cycle,    instr, L1-miss, LLC-miss, br-miss,     task, scale,  IPC,  CPU,  GHz 
XTensor2D, 0.00, 176180.00, 60830.00, 1586.00,  3040.00,  546.00, 36306.00,     1, 0.35, 1.21, 4.85 
     name, time,    cycle,    instr, L1-miss, LLC-miss, br-miss,    task, scale,  IPC,  CPU,  GHz 
XTensor2D, 0.00, 39599.00, 21641.00,  258.00,    41.00,  169.00, 7962.00,     1, 0.55, 4.21, 4.97 
     name, time,    cycle,    instr, L1-miss, LLC-miss, br-miss,    task, scale,  IPC,  CPU,  GHz 
XTensor2D, 0.00, 36483.00, 21641.00,  261.00,     5.00,  110.00, 7323.00,     1, 0.59, 5.84, 4.98 

   name, time,    cycle,    instr, L1-miss, LLC-miss, br-miss,    task, scale,  IPC,  CPU,  GHz 
Eigen2D, 0.00, 47271.00, 24427.00,  296.00,   298.00,  143.00, 9354.00,     1, 0.52, 2.81, 5.05 
   name, time,    cycle,    instr, L1-miss, LLC-miss, br-miss,    task, scale,  IPC,   CPU,  GHz 
Eigen2D, 0.00, 31979.00, 18101.00,  166.00,    19.00,  100.00, 6313.00,     1, 0.57, 16.23, 5.07 
   name, time,    cycle,    instr, L1-miss, LLC-miss, br-miss,    task, scale,  IPC,   CPU,  GHz 
Eigen2D, 0.00, 32383.00, 18101.00,  175.00,    38.00,   96.00, 6513.00,     1, 0.56, 18.09, 4.97 




§M: 2, K: 2
    name, time,    cycle,    instr, L1-miss, LLC-miss, br-miss,    task, scale,  IPC,   CPU,  GHz 
Fastor2D, 0.00, 31548.00, 17689.00,  149.00,    20.00,  101.00, 6112.00,     1, 0.56, 33.77, 5.16 
    name, time,    cycle,    instr, L1-miss, LLC-miss, br-miss,    task, scale,  IPC,   CPU,  GHz 
Fastor2D, 0.00, 31404.00, 17689.00,  161.00,     0.00,   95.00, 6105.00,     1, 0.56, 41.82, 5.14 
    name, time,    cycle,    instr, L1-miss, LLC-miss, br-miss,    task, scale,  IPC,   CPU,  GHz 
Fastor2D, 0.00, 31060.00, 17689.00,  167.00,     8.00,   93.00, 6145.00,     1, 0.57, 38.41, 5.05 

   name, time,    cycle,    instr, L1-miss, LLC-miss, br-miss,    task, scale,  IPC,   CPU,  GHz 
Blitz2D, 0.00, 32130.00, 18122.00,  155.00,    47.00,  113.00, 6359.00,     1, 0.56, 16.87, 5.05 

     name, time,    cycle,    instr, L1-miss, LLC-miss, br-miss,    task, scale,  IPC,  CPU,  GHz 
XTensor2D, 0.00, 42370.00, 22410.00,  245.00,   105.00,  193.00, 8393.00,     1, 0.53, 3.53, 5.05 
     name, time,    cycle,    instr, L1-miss, LLC-miss, br-miss,    task, scale,  IPC,  CPU,  GHz 
XTensor2D, 0.00, 37526.00, 21853.00,  264.00,    26.00,  119.00, 7474.00,     1, 0.58, 5.25, 5.02 
     name, time,    cycle,    instr, L1-miss, LLC-miss, br-miss,    task, scale,  IPC,  CPU,  GHz 
XTensor2D, 0.00, 37091.00, 21853.00,  247.00,    22.00,  107.00, 7377.00,     1, 0.59, 5.74, 5.03 

   name, time,    cycle,    instr, L1-miss, LLC-miss, br-miss,     task, scale,  IPC,  CPU,  GHz 
Eigen2D, 0.00, 55863.00, 25323.00,  321.00,   434.00,  166.00, 11266.00,     1, 0.45, 2.17, 4.96 
   name, time,    cycle,    instr, L1-miss, LLC-miss, br-miss,    task, scale,  IPC,  CPU,  GHz 
Eigen2D, 0.00, 34626.00, 18984.00,  198.00,    47.00,  111.00, 6661.00,     1, 0.55, 9.93, 5.20 
   name, time,    cycle,    instr, L1-miss, LLC-miss, br-miss,    task, scale,  IPC,   CPU,  GHz 
Eigen2D, 0.00, 32862.00, 18984.00,  191.00,    17.00,   97.00, 6572.00,     1, 0.58, 11.99, 5.00 

此模式继续。我想拥有的是,在每个name, ...之后都保留行§M: *,K:*,但是之后将其删除,直到模式2再次出现。

最后,它应该看起来像这样:

§M: 1, K: 2
    name, time,    cycle,    instr, L1-miss, LLC-miss, br-miss,     task, scale,  IPC,  CPU,  GHz 
Fastor2D, 0.00, 88378.00, 23272.00,  646.00,  1525.00,  241.00, 17447.00,     1, 0.26, 4.71, 5.07 
Fastor2D, 0.00, 32083.00, 17690.00,  148.00,    28.00,  116.00, 6437.00,     1, 0.55, 25.96, 4.98 
Fastor2D, 0.00, 31342.00, 17690.00,  161.00,    23.00,  100.00, 6125.00,     1, 0.56, 36.46, 5.12 

Blitz2D, 0.00, 33418.00, 17933.00,  168.00,    83.00,  109.00, 6473.00,     1, 0.54, 12.50, 5.16 

XTensor2D, 0.00, 176180.00, 60830.00, 1586.00,  3040.00,  546.00, 36306.00,     1, 0.35, 1.21, 4.85 
XTensor2D, 0.00, 39599.00, 21641.00,  258.00,    41.00,  169.00, 7962.00,     1, 0.55, 4.21, 4.97 
XTensor2D, 0.00, 36483.00, 21641.00,  261.00,     5.00,  110.00, 7323.00,     1, 0.59, 5.84, 4.98 

Eigen2D, 0.00, 47271.00, 24427.00,  296.00,   298.00,  143.00, 9354.00,     1, 0.52, 2.81, 5.05 
Eigen2D, 0.00, 31979.00, 18101.00,  166.00,    19.00,  100.00, 6313.00,     1, 0.57, 16.23, 5.07 
Eigen2D, 0.00, 32383.00, 18101.00,  175.00,    38.00,   96.00, 6513.00,     1, 0.56, 18.09, 4.97 




§M: 2, K: 2
    name, time,    cycle,    instr, L1-miss, LLC-miss, br-miss,    task, scale,  IPC,   CPU,  GHz 
Fastor2D, 0.00, 31548.00, 17689.00,  149.00,    20.00,  101.00, 6112.00,     1, 0.56, 33.77, 5.16 
Fastor2D, 0.00, 31404.00, 17689.00,  161.00,     0.00,   95.00, 6105.00,     1, 0.56, 41.82, 5.14 
Fastor2D, 0.00, 31060.00, 17689.00,  167.00,     8.00,   93.00, 6145.00,     1, 0.57, 38.41, 5.05 

Blitz2D, 0.00, 32130.00, 18122.00,  155.00,    47.00,  113.00, 6359.00,     1, 0.56, 16.87, 5.05 

XTensor2D, 0.00, 42370.00, 22410.00,  245.00,   105.00,  193.00, 8393.00,     1, 0.53, 3.53, 5.05 
XTensor2D, 0.00, 37526.00, 21853.00,  264.00,    26.00,  119.00, 7474.00,     1, 0.58, 5.25, 5.02 
XTensor2D, 0.00, 37091.00, 21853.00,  247.00,    22.00,  107.00, 7377.00,     1, 0.59, 5.74, 5.03 

Eigen2D, 0.00, 55863.00, 25323.00,  321.00,   434.00,  166.00, 11266.00,     1, 0.45, 2.17, 4.96 
Eigen2D, 0.00, 34626.00, 18984.00,  198.00,    47.00,  111.00, 6661.00,     1, 0.55, 9.93, 5.20 
Eigen2D, 0.00, 32862.00, 18984.00,  191.00,    17.00,   97.00, 6572.00,     1, 0.58, 11.99, 5.00 

是否有用于此的sed公式或awk中的一个?请也许解释您的台词是做什么的,我想了解一下。预先感谢

3 个答案:

答案 0 :(得分:2)

将您的输入作为一系列记录(第一行从§开始)进行处理。现在只需打印每条记录的第二行(即每条记录的第一行name)和不以name开头的每一行:

$ awk '/^§/{lineNr=0} (++lineNr==2) || (!/^ *name/)' file
§M: 1, K: 2
    name, time,    cycle,    instr, L1-miss, LLC-miss, br-miss,     task, scale,  IPC,  CPU,  GHz
Fastor2D, 0.00, 88378.00, 23272.00,  646.00,  1525.00,  241.00, 17447.00,     1, 0.26, 4.71, 5.07
Fastor2D, 0.00, 32083.00, 17690.00,  148.00,    28.00,  116.00, 6437.00,     1, 0.55, 25.96, 4.98
Fastor2D, 0.00, 31342.00, 17690.00,  161.00,    23.00,  100.00, 6125.00,     1, 0.56, 36.46, 5.12

Blitz2D, 0.00, 33418.00, 17933.00,  168.00,    83.00,  109.00, 6473.00,     1, 0.54, 12.50, 5.16

XTensor2D, 0.00, 176180.00, 60830.00, 1586.00,  3040.00,  546.00, 36306.00,     1, 0.35, 1.21, 4.85
XTensor2D, 0.00, 39599.00, 21641.00,  258.00,    41.00,  169.00, 7962.00,     1, 0.55, 4.21, 4.97
XTensor2D, 0.00, 36483.00, 21641.00,  261.00,     5.00,  110.00, 7323.00,     1, 0.59, 5.84, 4.98

Eigen2D, 0.00, 47271.00, 24427.00,  296.00,   298.00,  143.00, 9354.00,     1, 0.52, 2.81, 5.05
Eigen2D, 0.00, 31979.00, 18101.00,  166.00,    19.00,  100.00, 6313.00,     1, 0.57, 16.23, 5.07
Eigen2D, 0.00, 32383.00, 18101.00,  175.00,    38.00,   96.00, 6513.00,     1, 0.56, 18.09, 4.97




§M: 2, K: 2
    name, time,    cycle,    instr, L1-miss, LLC-miss, br-miss,    task, scale,  IPC,   CPU,  GHz
Fastor2D, 0.00, 31548.00, 17689.00,  149.00,    20.00,  101.00, 6112.00,     1, 0.56, 33.77, 5.16
Fastor2D, 0.00, 31404.00, 17689.00,  161.00,     0.00,   95.00, 6105.00,     1, 0.56, 41.82, 5.14
Fastor2D, 0.00, 31060.00, 17689.00,  167.00,     8.00,   93.00, 6145.00,     1, 0.57, 38.41, 5.05

Blitz2D, 0.00, 32130.00, 18122.00,  155.00,    47.00,  113.00, 6359.00,     1, 0.56, 16.87, 5.05

XTensor2D, 0.00, 42370.00, 22410.00,  245.00,   105.00,  193.00, 8393.00,     1, 0.53, 3.53, 5.05
XTensor2D, 0.00, 37526.00, 21853.00,  264.00,    26.00,  119.00, 7474.00,     1, 0.58, 5.25, 5.02
XTensor2D, 0.00, 37091.00, 21853.00,  247.00,    22.00,  107.00, 7377.00,     1, 0.59, 5.74, 5.03

Eigen2D, 0.00, 55863.00, 25323.00,  321.00,   434.00,  166.00, 11266.00,     1, 0.45, 2.17, 4.96
Eigen2D, 0.00, 34626.00, 18984.00,  198.00,    47.00,  111.00, 6661.00,     1, 0.55, 9.93, 5.20
Eigen2D, 0.00, 32862.00, 18984.00,  191.00,    17.00,   97.00, 6572.00,     1, 0.58, 11.99, 5.00

答案 1 :(得分:0)

使用awk:

# awk is a line oriented program.
# for each line in the input file, following procedures are run.
/^§/ { f = 1 } # if input line begins with a `§', flag is up
/^ *name,/ {   # if input line begins with zero or more spaces followed by `name,';
  if (f) {     # and if flag is up;
    print      # print this line
    f = 0      # put flag down
  }
  next         # break the cycle irrespective of flag's status, read a new line and start over
}
1 # when this is reached, print input line

用法:

awk '/^§/{f=1} /^ *name,/{if(f){print;f=0} next} 1' file

答案 2 :(得分:-1)

删除重复项的典型习惯用法是: awk '!d[$0]++'

您可以向其添加任何谓词,这些谓词可以基于上一行:

awk '
  /M:.+, K:/ { last_header_line = NR }
  last_header_line == NR-1 || !d[$0]++ { print }
'
相关问题