SAS: Batch Macro Variable Assignment by Class and Application in Dynamic Labeling

Set up “Class” table for Proc Tabulation.  “Class” table can force missing value in the table so the layout of the combination of class variables will not change due to 0 counts of the frequency of the variables.

data class;
do rank= 1, 2, 3;
do area = '0', '1';
do gender = 'F', 'M';
output;
end;
end;
end;
run;

Output Proc Tabulate summary to data set using “Class” layout. “Layout” data set contains frequency counts by rank by area and by gender.

proc tabulate  data = data out = layout missing classdata = class;
class rank area gender ;
var count;
table rank*(area="" all), (gender all)*count*(sum="");
run;

Add customary titles by rank by area in the “Layout” data set. Assign counts and titles to macro variables.

data layout;
set layout;
length varname $11 sum $2 title1 $60 title2 $60 vartitle1 $12 vartitle2 $12;
if count_sum = . then count_sum = 0;
sum = put(count_sum, 2.);
drop _TYPE_ _PAGE_ _TABLE_;
if gender = '' then gender = 'N';
if rank = 1 then title1 = "2017 Assistant Professors/Lecturers";
if rank=2 then title1 = "2017 Associate Professors/Lecturers";
if rank =3 then title1 = "2017 Fall Professors/Senior Lecturers";
if area = 0 then title2 = "General Arts"; 
if area = 1 then title2 = "Engineering";
if area = '' then do;
title2 = "All Areas of Specialization";
varname ="rank"||trim(left(rank_cd))||gender;
vartitle1 = "rank"||trim(left(rank_cd))||'t1';
vartitle2 = "rank"||trim(left(rank_cd))||'t2';
end;
else do;
varname = "rank"||trim(left(rank_cd))||"area"||trim(left(area))||gender;
vartitle1 = "rank"||trim(left(rank_cd))||"area"||trim(left(area))||'t1';
vartitle2 = "rank"||trim(left(rank_cd))||"area"||trim(left(area))||'t2';
end;
call symput(varname, sum);
call symput (vartitle1, title1);
call symput (vartitle2, title2);
run;
options symbolgen;

Application
Print the regression line by rank by area with corresponding titles and insert counts information of total observation and by gender on the respective chart. The “Inset” statement will translate the macro variables into format like N=75(46F/29M) and put into a text box inside of the axes of the plot.

ods pdf file="c:\test.pdf";
%macro plots;
%do j = 1 %to 3;
%do i = 0 %to 1;
title1 height=14pt "&&rank&j.area&i.t1";
title2 height=12pt "&&rank&j.area&i.t2";
proc sgplot data=glm_out (where=(rank=&j. and area = "&i.")) ;
scatter x=experience y=salary / group=gender grouporder=ascending  name='plot' markerattrs=(symbol=circlefilled) ;
series x=experience y=yhat / name='predict' legendlabel='ln(Predicted Sal)' lineattrs=(color=blue ) transparency = 0.5 ;
inset "N=&&rank&j.area&i.N (&&rank&j.area&i.f.F/&&rank&j.area&i.m.M)"  / position = bottomright  BORDER;
run;
quit;
%end;
%end;
%mend plots;
%plots
Ods pdf close;

SAS: Plot Regression Line with 2 Standard Deviation

Get GLM output in data set

proc glm data=data noprint;
class rank_cd area;
model salary= Experience rank area / predicted cli; * regress SALARY against the 3 predictor variables ;
output out = glm_out Predicted = yhat R=resid lcl=lcl lclm=lclm ucl=ucl uclm=uclm rstudent=rstd student=stu dffits = infl stdr =error;
run;

Calcuate sample standard deviation using GLM output data set

proc univariate data=glm_out;
var resid;
output out = univar_out STD = sample_std_devn;
run;

Assign sample standard deviation to macro variable

data std;
set univar_out;
call symput('sstd',sample_std_devn);
run;
%put &sstd.;

Update GLM output data set with 2 std information

data glm_out;
set glm_out;
ustd_2 = yhat + 2*&sstd.;
lstd_2 = yhat - 2*&sstd.;
run;

Plot regression line with 2 standard deviation lines (upper/lower)

proc sgplot data=glm_out (where = (rank = 1 and area =1);
scatter x=experience y=salary / group=gender grouporder=ascending name='plot' markerattrs=(symbol=circlefilled) ;
series x=experience y=yhat / name='predict' legendlabel='ln(Predicted Sal)' lineattrs=(color=blue ) transparency = 0.5 ;
series x=experience y=ustd_2 / name='upper' legendlabel='2 Standard Deviation' lineattrs=(color = lightblue) transparency = 0.5;
series x=experience y=lstd_2 / name='lower' legendlabel='2 Standard Deviation' lineattrs=(color = lightblue) transparency = 0.5;
run;
quit;