SAS: Create SAS Table from List of Strings

Normally I need to compile a unique list of text strings from SAS table and run report for each item in the list through a %do loop. See details in the previous post. In this case, I need to convert the list of strings back to SAS table using space as delimiter.

Solution

%let lst = apple orange grape carrot banana watermelon peach;
%macro createtbl;
data lst; 
length fruit $15.;
%let i = 1;
%do %while (%scan(&lst., &i., ' ') ne );
  fruit = "%scan(&lst., &i. , ' ')"; 
  output;
%let i = %eval(&i. +1);
%end; 
run;
%mend createtbl;
%createtbl;

Notes

  • Have to use macro with %macro and %mend because %do, %end, and %eval are not valid in open code. Macros with %macro and %mend are closed marcos.
  • Use single quotation ‘ ‘ for space and use double quotation ” ” for string. The other way will not work. In the above example, if I change the equation for fruit to ‘%scan(&lst., &i. , ” “)’, then the string %scan(&lst., &i., ” “) becomes the values in the fruit field without interpreting the macro variable in the equation and applying the scan function.
  • Need %let i = %eval(&i. +1) with the %do % while statement

Wrong Code Demo and Error Diagnosis

data lst; 
length fruit $15.;
do i =1 to 7;
  fruit = "%scan(&lst., i , ' ')"; 
  output;
end;
run;

Error Message

  • ERROR: A character operand was found in the %EVAL function or %IF condition where a numeric
  • operand is required. The condition was: i
  • ERROR: Argument 2 to macro function %SCAN is not a number.

Diagnosis

  • The second argument i is not a number. To solve this, have to use closed macro and %eval, so the value of i +1 can be calculated.

Other Methods that don’t work

  • Create empty table first and then add values by row.
  • Don’t know how to add values by row without using macro.
/* NOT USEFUL*/
data lst;
attrib 
    fruit length=$15 format=$15. label="fruit";
stop;
run;

SAS: Reading Census Data

Census data is exceptionally large. The 2016 census profile for Ontario is 4.5G and has more than 46,694,909 lines of records. To extract the data efficiently, StatsCan provides a csv file that identifies the starting row number for each geography. Using this file, you can compile the parameter list for the geographical area of interest at the selected geographic level, eg. province level, census division level, and census subdivision level.
Census file can be downloaded at link
Step 1: Compile parameter lists

%put &name.;
Canada Ontario Durham York Toronto Peel Halton
 %put &start.;
2 2249 7287023 9513800 12198965 20521853 25289987
 %put &end.;
2248 4495 7289269 9516046 12201211 20524099 25292233

Step 2: Extract and Compile data

%macro ext (namelst=, startlst=, endlst=);
proc datasets library=work noprint;
delete census;
quit;
%let i=1;
%do %while (%scan(&namelst., &i, ' ') ne );
%let parm1=%scan(&namelst., &i, ' ');
%let parm2=%scan(&startlst., &i, ' ');
%let parm3=%scan(&endlst., &i, ' ');
data census_&parm1.;
infile 'X:\Work\Stats Can\98-401-X2016044_ONTARIO_eng_CSV\98-401-X2016044_ONTARIO_English_CSV_data.csv'
delimiter = ',' firstobs=&parm2. obs=&parm3. TRUNCOVER  DSD LRECL=32767 ;
INFORMAT 
year 8.
geo_code  $13.
geo_level 8.
geo_name $80.
gnr 8.1
gnr_lf 8.1
quality_flag $5. 
alt_geo_code 8.
Item $50.
itemID 8.
Notes 8.
Total 8.
Male $8.
Female $8.
;
FORMAT 
year 8.
geo_code  $13.
geo_level 8.
geo_name $50.
gnr 8.1
gnr_lf 8.1
quality_flag $5. 
alt_geo_code 8.
Item $80.
itemID 8.
Notes 8.
Total 8.
Male $8.
Female $8.
;
input
year 
geo_code $
geo_level 
geo_name $
gnr
gnr_lf
quality_flag 
alt_geo_code 
Item $
itemID 
Notes 
Total
Male $
Female $
;
run;
proc append base = census data = census_&parm1.;
run;
%let i = %eval (&i +1);
%end;
%mend ext;
%ext (namelst=&name., startlst=&start., endlst=&end. );

The resulting dataset is only 4.1mb, which you can manipulate efficiently.

SAS: Batch Macro Variable Assignment by Class and Application in Dynamic Labeling

Set up “Class” table for Proc Tabulation.  “Class” table can force missing value in the table so the layout of the combination of class variables will not change due to 0 counts of the frequency of the variables.

data class;
do rank= 1, 2, 3;
do area = '0', '1';
do gender = 'F', 'M';
output;
end;
end;
end;
run;

Output Proc Tabulate summary to data set using “Class” layout. “Layout” data set contains frequency counts by rank by area and by gender.

proc tabulate  data = data out = layout missing classdata = class;
class rank area gender ;
var count;
table rank*(area="" all), (gender all)*count*(sum="");
run;

Add customary titles by rank by area in the “Layout” data set. Assign counts and titles to macro variables.

data layout;
set layout;
length varname $11 sum $2 title1 $60 title2 $60 vartitle1 $12 vartitle2 $12;
if count_sum = . then count_sum = 0;
sum = put(count_sum, 2.);
drop _TYPE_ _PAGE_ _TABLE_;
if gender = '' then gender = 'N';
if rank = 1 then title1 = "2017 Assistant Professors/Lecturers";
if rank=2 then title1 = "2017 Associate Professors/Lecturers";
if rank =3 then title1 = "2017 Fall Professors/Senior Lecturers";
if area = 0 then title2 = "General Arts"; 
if area = 1 then title2 = "Engineering";
if area = '' then do;
title2 = "All Areas of Specialization";
varname ="rank"||trim(left(rank_cd))||gender;
vartitle1 = "rank"||trim(left(rank_cd))||'t1';
vartitle2 = "rank"||trim(left(rank_cd))||'t2';
end;
else do;
varname = "rank"||trim(left(rank_cd))||"area"||trim(left(area))||gender;
vartitle1 = "rank"||trim(left(rank_cd))||"area"||trim(left(area))||'t1';
vartitle2 = "rank"||trim(left(rank_cd))||"area"||trim(left(area))||'t2';
end;
call symput(varname, sum);
call symput (vartitle1, title1);
call symput (vartitle2, title2);
run;
options symbolgen;

Application
Print the regression line by rank by area with corresponding titles and insert counts information of total observation and by gender on the respective chart. The “Inset” statement will translate the macro variables into format like N=75(46F/29M) and put into a text box inside of the axes of the plot.

ods pdf file="c:\test.pdf";
%macro plots;
%do j = 1 %to 3;
%do i = 0 %to 1;
title1 height=14pt "&&rank&j.area&i.t1";
title2 height=12pt "&&rank&j.area&i.t2";
proc sgplot data=glm_out (where=(rank=&j. and area = "&i.")) ;
scatter x=experience y=salary / group=gender grouporder=ascending  name='plot' markerattrs=(symbol=circlefilled) ;
series x=experience y=yhat / name='predict' legendlabel='ln(Predicted Sal)' lineattrs=(color=blue ) transparency = 0.5 ;
inset "N=&&rank&j.area&i.N (&&rank&j.area&i.f.F/&&rank&j.area&i.m.M)"  / position = bottomright  BORDER;
run;
quit;
%end;
%end;
%mend plots;
%plots
Ods pdf close;

SAS: Data Set Compilation through Remote Submit and Download

Sometime it is better to submit the block of codes to the server for execution when the data sets are large and multiple years of data needed to be  compiled.  The remote calculation can also free up the calculation capacity on your own PC, so you can work on something else while waiting for the server.

The following codes are used to:

  • loop through 5 reporting periods each year;
  • retrieve specific variables that meet the conditions;
  • compile data set for each year;
  • stack data sets for multiple years and generate final data set with all years records for download;
  • delete data set for each year in the workspace on the server

The caveats for remote submit are:

  • codes need to be thoroughly validated locally before submit to the server
  • use “nodetails” and “nolist” to omit output report

To remote submit the code, you first select the code block,  then right click and then select “Remote Submit…”.

%let varlist1 = var1 var2 var3 var4 var5 var6 var7 ;
proc datasets library = work nodetails nolist;
delete opsis;
run;
%macro opsis (strtyear, stopyear);
%do yr= &strtyear %to &stopyear;
%put &yr.;
%let nextyr = %eval(&yr.+1);
%let charyr = %substr(&yr.,3,2);
%let charnextyr = %substr(&nextyr.,3,2);
%put &charyr.;
DATA opsis_&charyr.;
SET opsis.uecJun&charyr. (keep=&varlist1.)
opsis.uecJul&charyr. (keep=&varlist1.)
opsis.uecNov&charyr. (keep=&varlist1.)
opsis.uecFeb&charnextyr. (keep=&varlist1.)
opsis.uecMar&charnextyr. (keep=&varlist1.);
where var1 ne 'XX' and var2 ne '' and var3= 'XXX';
proc append base=opsis data =opsis_&charyr.;
run;
%end;
%mend opsis;
%opsis (2009, 2017);
proc download data = opsis out=opsis;run;
proc datasets library=work nodetails;
delete opsis_09 opsis_10-opsis_17;
run;

Macro: Check variable length, name, type of multiple tables with similar layout

/* set up list for the tables requied to scan for  the variables */

%let tlst = a b c d e f g;

/* Method 1: use parameter */

/* Generate vairable list for each table */

%macro varlen (sec=);
%let i = 1;
%do %while (%scan(&sec., &i, ‘ ‘) ne );
%let tbl= %scan(&sec., &i, ‘ ‘);
proc contents data = section&tbl._ug out=ug&tbl. (keep= MEMNAME name type length) noprint;
proc contents data = section&tbl._gr out=gr&tbl. (keep= MEMNAME name type length) noprint;
%let i = %eval (&i +1);
%end;
run;
%mend varlen;
%varlen (sec = &tlst.);

/* Method 2: use macro variable list directly */

/* Append varaible lists to one table */
%macro comp ;
%let i = 1;
%do %while (%scan(&tlst., &i, ‘ ‘) ne );
%let tbl= %scan(&tlst., &i, ‘ ‘);
proc append base = uglen  data = ug&tbl.;
run;
%let i = %eval (&i +1);
%end;
%let i = 1;
%do %while (%scan(&tlst., &i, ‘ ‘) ne );
%let tbl= %scan(&tlst., &i, ‘ ‘);
proc append base = grlen  data = gr&tbl.;
run;
%let i = %eval (&i +1);
%end;
proc sort data = uglen;
by name memname;
proc sort data = grlen;
by name memname;
run;
%mend comp;
%comp ;

SAS: Set start and stop for macro variables

Sometimes I need to repeat the same exercise for multiple times, for example faculty want retention rate not just for one year but multiple year, it is easy to use %macro macroname (parameter1, parameter2) in conjunction with %do varname = &value1 &to &value2 to customize the year range in the do loop statement per clients request.

eg;

%macro step (strtyear, stopyear);
%do yr= &strtyear %to &stopyear;

….continue coding

%end;

%mend step;

%step (11, 14);

 

SAS: Convert Variable between Numeric and Character Format

A. Use Put or Input
Convert between numeric and character variable.

  • character to numeric (input)
old_char = "2018";
new_num = input(old_char, 8.);
new_num = 2018;
  • character to numeric (input) to character (put)
old_char = "2018";
new_char = put (input (substr(old_char , 1, 4 ), 8.) -1 , 4.);
new_char = "2017";

/* or */
new_char = put (old_char*1-1 , 4.);
new_char = "2017";
  • numeric to character (put), and with leading Zero.
old_num = 2018;
new_char = put(old_num, 4.); *new_char = "2018";
new_char1 = put(old_num, z8.); *new_char1 = "00002018";
  • numeric to character (put) to numeric (input)
old_num = 2018;
new_num = input(substr(put(old_num, 4.),3, 2) , 8.);
new_num = 18;

Use the following functions to check whether any digit or alphabetic character is in the character variable. The results will be the position of the first digit/alphabetic character in the field.

check1 = anydigit(var); *return position of first digit, 0 if not found in the string;
check2 = anyalpha(var); *return position of first alphabetic character, 0 if not found in the string;
check3 = notalpha(var); *return position of first non digit, 0 if not found in the string;
check4 = notdigit(var); *return position of first non alphabetic character, 0 if not found in the string;
  • Please note there should be no space in between % and statement eg. macro, let, mend, put, eval etc.
  • To apply the macro variable, use &varname. format.

CODE:

%macro setyr;
%let yr = 14;
%let nextyr =%eval(&yr. +1);  /* 15 */
%let next2yr=%eval(&yr. +2);  /* 16 */
%let fisyr = “20%substr(&yr., 1,2)/%substr(&nextyr., 1,2)”;   /* “2014/2015” */
%let fisnextyr = “20%substr(&nextyr., 1,2)/%substr(&next2yr., 1, 2)”;   /* “2015/2016” */
%let year = “20&yr.”; /* “2014” */
%put &yr;
%put &nextyr;
%put &next2yr;
%put &fisyr;
%put &fisnextyr;
%put &year;
%mend setyr;
%setyr;

OUTPUT:
14
15
16
“2014/15”
“2015/16”
“2014”

B. Use Vvalue fuction
Vvalue() returns the formatted value that is associate with the variable.

data want;
data have;
new_charvar1 = vvalue(formatted_numvar1);  *formatted means the variable has been applied with format;
new_numvar2= vvalue(formated_charvar2);
run;