优化查找和替换脚本

时间:2013-11-18 11:19:29

标签: regex bash shell unix awk

我开发了一个解析文本的脚本,并在特定条件下替换文本并生成新文件。

我想知道我是否可以优化它或任何替代建议

cat *inputparams.txt   | awk -F"|" 'BEGIN {OFS=","} $2==1&&$3==1{$3="Subscription Creation without Previous"} $2==1&&$3==5{$3="Offer Upgrade"} $2==1&&$3==6{$3="Offer Downgrade"} $2==1&&$3==7{$3="Campaign Extend"} $2==1&&$3==8{$3="Campaign Change"} $2==1&&$3==27{$3="Subscription Update"} $2==2&&$3==2{$3="Charging Renewal"} $2==2&&$3==3{$3="Subscription Reactivation"} $2==2&&$3==4{$3="Subscription Reactivation with Recharge Monitoring"} $2==2&&$3==8 {$3="Campaign Change" } $2==2&&$3==30 {$3="Limited Service"}  $2==3&&$3==11{$3="Cancellation"} $2==3&&$3==17{$3="Subscriber Account Reactivation"} $2==4&&$3==11{$3="Cancellation"} $2==5&&$3==11{$3="Cancellation"} $2==5&&$3==12{$3="Expiration"} $2==5&&$3==13{$3="Inactivation due to Charging"} $2==5&&$3==14{$3="Inactivation due to Ceased Account"} $2==5&&$3==15 {$3="Inactivation due to Payment Method Change"} $2==5&&$3==16 {$3="Inactivation due to Ownership Change"} $2==5&&$3==18 {$3="Inactivation due to Offer Upgrade"} $2==5&&$3==19  {$3="Inactivation due to Offer Downgrade" } $2==6&&$3==9{$3="Campaign Schedule"} $2==6&&$3==10{$3="Offer Schedule"}  $1==5 && $2==2{$2="RENEWAL"} $1==4 && $2==2{$2="SUBS. CREATE RENEWAL AOC"} $1==6 && $2==3{$2="REFUND"} $1==4 && $2==5 {$2="INACTIVATION"} $1==5 && $2==4{$2="PENALTY"} $1==1{$1="RESERVE"} $1==2{$1="COMMIT"} $1==3{$1="ROLLBACK"} $1==5 && $2==1{$2="SUBS. CREATE DIRECT DEBIT"}  $2==1 {$2="CREATION"}  {print $1,$2,$3,$4,$5,$8}' >  $RECON_PATH/tx_id.txt

cat *inputparams.txt |  awk -F"[" '{print $4}' | awk -F"]" '{print $1}' | awk -F","  'BEGIN {OFS=","} $2==1{$2="POSTPAID"} $2==2{$2="PREPAID"} {print $2,$1}' > $RECON_PATH/msisdn_payment

cat *inputparams.txt |  awk -F"," '{print $3 }' > $RECON_PATH/package

paste -d","  $RECON_PATH/tx_id.txt $RECON_PATH/package  $RECON_PATH/msisdn_payment  > $RECON_PATH/final.txt

以下是样本记录

5|2|3|rfe-29883066|9840311190936312183|2.0|49.0|20131119093631|[[],4900671,SOCIAL_DATA,null,SOCIAL DATA,20130710000000,,,[971508592346,2],null,7012183,20130926190549,[[{LIMITED_PERIOD_END_DATE=20131110093613}{INITIAL_CHARGED_AMOUNT=49.0}{INITIAL_CHARGE_OPTION=1}{ENE_EVENT_ID=24645862}{FULFILL_ON_RESERVE=0}],false,false,null,4900672,SOCIAL DATA,20130710000000,2,20131119093631,0,2,[111111111111,2],USSD,2592000000,[{[{PACKAGE_ID_ONE_TIME_NORMAL_QOS=2000002935}{PREFERRED_PACKAGE_ID=PACKAGE_ID_AUTO_RN}{PACKAGE_ID_ONE_TIME_WITH_THROTTLING=2000002935}{PACKAGE_ID_AUTO_RN=2000002881}{PROVISIONED_PACK_ID=2000002935}{PROVISIONED_PACK_TYPE=PACKAGE_ID_ONE_TIME_WITH_THROTTLING}],DATAN_SOCIAL_DATA,4900667,DATAN,DATAN,[]}{[{PACKAGE_ID_ONE_TIME_NORMAL_QOS=2000002922}{PREFERRED_PACKAGE_ID=PACKAGE_ID_AUTO_RN}{PACKAGE_ID_ONE_TIME_WITH_THROTTLING=2000002922}{PACKAGE_ID_AUTO_RN=2000002880}{PROVISIONED_PACK_ID=2000002922}{PROVISIONED_PACK_TYPE=PACKAGE_ID_ONE_TIME_WITH_THROTTLING}],DATAN_DATA_SOCIAL,4900669,DATAN,DATAN,[]}{[{PACKAGE_ID_ONE_TIME_NORMAL_QOS=2000003031}{PREFERRED_PACKAGE_ID=PACKAGE_ID_AUTO_RN}{PACKAGE_ID_ONE_TIME_WITH_THROTTLING=2000003031}{PACKAGE_ID_AUTO_RN=2000003030}],DATAN_SOCIAL_THROTT,5400425,DATAN,DATAN,[]}{[{RATE_PLAN_ID=629120}],MKTWSSOCIALDATA,4900665,CMN,CMN,[]}],2,null,6912967,20130926190549]]

此致

2 个答案:

答案 0 :(得分:1)

这就是我的想法。将脚本放入文件并使其可执行,以便它可以像:

一样运行
script.awk *inputparams.txt

脚本:     #!/ usr / bin / awk -f

BEGIN {
    FS="|"
    OFS="," 

    # $3 field strings
    arr[ "2==1 3==1" ] = "Subscription Creation without Previous"
    arr[ "2==1 3==5" ] = "Offer Upgrade"
    arr[ "2==1 3==6" ] = "Offer Downgrade"
    arr[ "2==1 3==7" ] = "Campaign Extend"
    arr[ "2==1 3==8" ] = "Campaign Change"
    arr[ "2==1 3==27" ] = "Subscription Update"
    arr[ "2==2 3==2" ] = "Charging Renewal"
    arr[ "2==2 3==3" ] = "Subscription Reactivation"
    arr[ "2==2 3==4" ] = "Subscription Reactivation with Recharge Monitoring"
    arr[ "2==2 3==8" ] = "Campaign Change"
    arr[ "2==2 3==30" ] = "Limited Service"
    arr[ "2==3 3==11" ] = "Cancellation"
    arr[ "2==3 3==17" ] = "Subscriber Account Reactivation"
    arr[ "2==4 3==11" ] = "Cancellation"
    arr[ "2==5 3==11" ] = "Cancellation"
    arr[ "2==5 3==12" ] = "Expiration"
    arr[ "2==5 3==13" ] = "Inactivation due to Charging"
    arr[ "2==5 3==14" ] = "Inactivation due to Ceased Account"
    arr[ "2==5 3==15" ] = "Inactivation due to Payment Method Change"
    arr[ "2==5 3==16" ] = "Inactivation due to Ownership Change"
    arr[ "2==5 3==18" ] = "Inactivation due to Offer Upgrade"
    arr[ "2==5 3==19" ] = "Inactivation due to Offer Downgrade"
    arr[ "2==6 3==9" ] = "Campaign Schedule" 
    arr[ "2==6 3==10" ] = "Offer Schedule"

    # $2 field strings
    arr[ "1==5 2==2" ] = "RENEWAL"
    arr[ "1==4 2==2" ] = "SUBS. CREATE RENEWAL AOC"
    arr[ "1==6 2==3" ] = "REFUND"
    arr[ "1==4 2==5" ] = "INACTIVATION"
    arr[ "1==5 2==4" ] = "PENALTY"
    arr[ "1==5 2==1" ] = "SUBS. CREATE DIRECT DEBIT"
    arr[ "0==0 2==1" ] = "CREATION" # this needs special attention

    # $1 field strings
    arr[ "0==0 1==1" ] = "RESERVE"
    arr[ "0==0 1==2" ] = "COMMIT"
    arr[ "0==0 1==3" ] = "ROLLBACK"

    # $9 field, subfield 10 (by comma) strings - a key name "hack"
    arr[ "9==10 2==1" ] = "POSTPAID"
    arr[ "9==10 2==2" ] = "PREPAID"
}

# print output line here
{
    print getString(0,1),getStringWithDefault(1,2),getString(2,3),$4,$5,$8,field9Strings()
}

function makeShortKey( field, value ) {
    gsub( value, "", " " )
    return field "==" value
}

function makeLongKey( f1, v1, f2, v2 ) {
    return makeShortKey( f1, v1 ) " " makeShortKey( f2, v2 )
}

function getStringWithDefault( a, b ) {
    sd = getString( a, b )
    if( s == "" ) sd = getString( 0, b )
    return sd
}

# take fields and their values to construct keys - 0 is special to 'standardize' keys
function getString( field1, field2 ) {
    s = arr[ makeLongKey( field1, field1 == 0 ? 0 : $field1, field2, $field2 ) ]
    return (s == "") ? $field2 : s
}

function field9Strings() {
    split( $9, carr, "," )
    sub( /\[/, "", carr[ 9 ] );
    sub( /]/, "", carr[ 10 ])
    s9 = arr[ "9==10 " makeShortKey( 2, carr[ 10 ] ) ]
    return carr[ 3 ] OFS ((s9 == "") ? carr[ 10 ] : s9) OFS carr[ 9 ]
}

我选择使用arr因为我认为它使字符串更易于维护。我试图选择能够从另一个版本的字符串文件中读取这些字符串的键。

字段9解析的键现在是“硬编码”。

对于arr[ "0==0 2==1" ],我最终添加了一个特殊的getStringWithDefault()函数。如果“CREATION”依赖于第1列中的值,则可以更新密钥并删除该功能。

答案 1 :(得分:0)

你可以尝试

awk -F"|" -f parse.awk *inputparams.txt

其中parse.awk是:

BEGIN {
    OFS=","
} 
{
    getFields()
    split($0,a,"[");
    split(a[4],b,"]");
    split(b[1],c,",");
    if (c[2]==1) msi="POSTPAID"
    else if (c[2]==2) msi="PREPAID"
    msi=msi","c[1]
    split($0,d,",")
    pack=d[3]
    print f1,f2,f3,f4,f5,f8,pack,msi
}

function getFields() {
    f1=$1; f2=$2; f3=$3; f4=$4; f5=$5; f8=$8
    if ($2==1) {
        if ($3==1) {
            f3="Subscription Creation without Previous"
        } 
        if ($3==5) {
            f3="Offer Upgrade"
        } 
        if ($3==6) {
            f3="Offer Downgrade"
        } 
        if ($3==7) {
            f3="Campaign Extend"
        } 
        if ($3==8) {
            f3="Campaign Change"
        } 
        if ($3==27) {
            f3="Subscription Update"
        } 
    }
    if ($2==2) {
        if ($3==2) {
            f3="Charging Renewal"
        } 
        if ($3==3) {
            f3="Subscription Reactivation"
        } 
        if ($3==4) {
            f3="Subscription Reactivation with Recharge Monitoring"
        } 
        if ($3==8) {
            f3="Campaign Change" 
        } 
        if ($3==30) {
            f3="Limited Service"
        } 
    } 
    if ($2==3) {
        if ($3==11) {
            f3="Cancellation"
        }
        if ($3==17) {
            f3="Subscriber Account Reactivation"
        }
    } 
    if ($2==4) {
        if ($3==11) {
            f3="Cancellation"
        }
    }
    if ($2==5) {
        if ($3==11) {
            f3="Cancellation"
        } 
        if ($3==12) {
            f3="Expiration"
        } 
        if ($3==13) {
            f3="Inactivation due to Charging"
        }
        if ($3==14) {
            f3="Inactivation due to Ceased Account"
        } 
        if ($3==15) {
            f3="Inactivation due to Payment Method Change"
        } 
        if ($3==16) {
            f3="Inactivation due to Ownership Change"
        } 
        if ($3==18) {
            f3="Inactivation due to Offer Upgrade"
        } 
        if ($3==19)  {
            f3="Inactivation due to Offer Downgrade" 
        } 
    }
    if ($2==6) {
        if ($3==9) {
            f3="Campaign Schedule"
        } 
        if ($3==10) {
            f3="Offer Schedule"
        }
    }  
    if ($1==5 && $2==2) {
        f2="RENEWAL"
    } 
    if ($1==4 && $2==2) {
        f2="SUBS. CREATE RENEWAL AOC"
    } 
    if ($1==6 && $2==3) {
        f2="REFUND"
    } 
    if ($1==4 && $2==5) {
        f2="INACTIVATION"
    } 
    if ($1==5 && $2==4) {
        f2="PENALTY"
    } 
    if ($1==5 && $2==1) {
        f2="SUBS. CREATE DIRECT DEBIT"
    } 
    if ($2==1) {
        f2="CREATION"
    }
    if ($1==1) {
        f1="RESERVE"
    } 
    if ($1==2) {
        f1="COMMIT"
    } 
    if ($1==3) {
        f1="ROLLBACK"
    } 
}
相关问题