指标变量按组最大值

时间:2015-01-16 13:57:26

标签: sas

对于以下任务,是否有比下面提到的更优雅的方式:

创建指标变量(在“MAX_X1”和“MAX_X2”之下),在多个观察的每个组(“key1”下面)中,如果该观察值对应于变量的最大值,则值为1。 eache组,否则为0

data have;
call streaminit(4321);
do key1=1 to 10;
 do key2=1 to 5;
  do x1=rand("uniform");
     x2=rand("Normal");
     output;
  end;
 end;
end;
run;

proc means data=have noprint;
by key1;
var x1 x2;
output out=max
max= / autoname;
run;

data want;
merge have max;
by key1;
drop _:;
run;

proc sql;
    title "MAX";
    select name into :MAXvars separated by ' '
        from dictionary.columns
            WHERE LIBNAME="WORK" AND MEMNAME="WANT" AND NAME like "%_Max"
                        order by name;
quit;
title;

data want; set want;
    array MAX  (*) &MAXvars;
    array XVars (*) x1 x2;
    array Indicators (*) MAX_X1 MAX_X2;
    do i=1 to dim(MAX);
     if XVars[i]=MAX[i] then Indicators[i]=1; else Indicators[i]=0;
    end;
drop i;
run;

感谢您提出任何优化建议

2 个答案:

答案 0 :(得分:2)

Proc sql可以与group by语句一起使用,以允许跨变量值的汇总函数。

    data have;
    call streaminit(4321);
    do key1=1 to 10;
     do key2=1 to 5;
      do x1=rand("uniform");
         x2=rand("Normal");
         output;
      end;
     end;
    end;
    run;

    proc sql;
        create table want
        as select
        key1,
        key2,
        x1,
        x2,
        case 
            when x1 = max(x1) then 1
            else 0 end as max_x1,
        case
            when x2 = max(x2) then 1
            else 0 end as max_x2
        from have
        group by key1
        order by key1, key2;
    quit;

答案 1 :(得分:1)

如果您读取输入数据集两次,也可以在单个数据步骤中执行此操作 - 这是双DOW循环的示例。

data have;
call streaminit(4321);
do key1=1 to 10;
 do key2=1 to 5;
  do x1=rand("uniform");
     x2=rand("Normal");
     output;
  end;
 end;
end;
run;

/*Sort by key1 (or generate index) if not already sorted*/
proc sort data = have;
    by key1;
run;

data want;
    if 0 then set have;
    array xvars[3,2] x1 x2 x1_max_flag x2_max_flag t_x1_max t_x2_max;
    /*1st DOW-loop*/
    do _n_ = 1 by 1 until(last.key1);
        set have;
        by  key1;
        do i = 1 to 2;
            xvars[3,i] = max(xvars[1,i],xvars[3,i]);
        end;
    end;
    /*2nd DOW-loop*/
    do _n_ = 1 to _n_;
        set have;
        do i = 1 to 2;
            xvars[2,i] = (xvars[1,i] = xvars[3,i]);
        end;
        output;
    end;
    drop i t_:;
run;

理解这可能有点复杂,所以这里有一个粗略的解释:

  • 使用第一个DOW循环逐个读取,在读入每行时更新滚动最大变量。不要输出任何内容。
  • 现在使用第二个DOW循环再次读取相同的分组,检查每行是否等于最大滚动并输出每一行。
  • 返回第一个DOW循环,阅读下一个分组并重复。