计算文本文件中的字符串数

时间:2013-05-24 07:42:50

标签: awk

我有一个包含10列的文本文件,如f.txt,如下所示:

aab abb  263-455
aab abb  263-455
aab abb  263-455
bbb abb  26-455
bbb abb  26-455
bbb aka  264-266
bga bga  230-232
bga bga  230-232

我想根据第三列的数量计算第一列和第二列中每个字符串的唯一编号。

输出:

aab - 1
abb - 2
bbb - 2
aka - 1
bga - 2

Total no - 8

5 个答案:

答案 0 :(得分:3)

这样可以解决问题:

$ awk '!a[$0]++{c[$1]++;c[$2]++}
       END{for(k in c){print k" - "c[k];s+=c[k]}print "\nTotal No -",s}' file
aka - 1
bga - 2
aab - 1
abb - 2
bbb - 2

Total No - 8

以更易读的脚本形式:

!lines[$0]++{
    count[$1]++
    count[$2]++
}
END {
    for (line in count) {
        print line" - "count[line]
        sum += count[line]
    }
    print "\nTotal No -",sum
}

要以此形式运行,请将其保存到文件script.awk并:

$ awk -f script.awk file
aka - 1
bga - 2
aab - 1
abb - 2
bbb - 2

Total No - 8

答案 1 :(得分:3)

awk '
       !s[1":"$1":"$3]++{sU[$1]++;tot++} 
       !s[2":"$2":"$3]++{sU[$2]++;tot++} 
       END{
         for (x in sU) print x, sU[x]; 
         print "Total No -",tot;
       }' input

<强>输出

bga 1
aab 1
bbb 2
aka 1
bga 1
abb 2
Total No - 8

答案 2 :(得分:2)

 awk '!b[$1,$3]++{a[$1]++} !c[$2,$3]++{a[$2]++} END{for (i in a) {print i,a[i];sum+=a[i]}print "Total -",sum}' file

答案 3 :(得分:1)

这是一个有点长的命令,但它很容易理解:

gawk '{a[$3,$1,1];a[$3,$2,2]}END{for(i in a)print i}' input |
    cut -d $'\x1c' -f 2 | sort | uniq -c |
        awk -v OFS=' - ' '{sum+=$1;print $2,$1};END{print "\nTotal No",sum}'

aab - 1
abb - 2
aka - 1
bbb - 2
bga - 2

Total No - 8

答案 4 :(得分:0)

{ if (a[$1][$3] != 1){
    a[$1][$3] = 1; 
    total[$1]++; 
    }
if (a[$2][$3] != 1){
    a[$2][$3] = 1; 
    total[$2]++; 
    }
}
END {
    for (item in total){
        print item, total[item];
        totalCount += total[item];
    }
    print "\nTotal no - ", totalCount;
}

输出:

aka 1
bga 1
aab 1
abb 2
bbb 2

Total no -  7