options ls=90; filename foo url "http://www.uvm.edu/~abh/stat295/datasets/pledges.dat"; /* The data set consists of the amount of money pledged to a person in an ongoing charity walk and the number of miles walked by each person. People are classified into one of three age groups and by sex. */ data pledges; infile foo; input id sex $ agegroup pledged miles; label pledged = "Amount pledged" miles = "Miles walked"; run; proc sort data=pledges; by sex agegroup pledged; run; proc print data=pledges; format pledged dollar6. ; title "Amounts pledged and miles walked"; run; /* When using a BY statement in a data step, two automatic variables are created that help you to know when the data step is processing the first or last observation within a particular by group. These variables, FIRST. and LAST., are logical variables with values of 0 or 1. You can then test whether you are processing the first or last observation of a group and take a specified action based on this test. You can also test whether you are processing the last observation in the data step by using an option of the SET statement to create a logical variable that you can use to test with. The following data step outputs the person in each sex - agegroup combination with the highest pledged amounts. */ data high_pledge; set pledges; by sex agegroup; if last.agegroup then output; run; proc print data=high_pledge; var id sex agegroup pledged; format pledged dollar5. ; title "Highest pledged amounts for each sex and agegroup"; run; /* We can also find out who walked the most miles by doing the same thing with the miles variable. */ proc sort data=pledges; by sex agegroup miles; run; data high_miles; set pledges; by sex agegroup; if last.agegroup; run; proc print data=high_miles; var id sex agegroup miles; title "Highest miles walked for each sex and agegroup"; run; /* The following data step does several things. A logical variable called EOF is created that is used to test whether we are processing the last observation in the data. FIRST. and LAST. automatic logical variables are created to help keep running totals of pledges and miles walked for each sex. The SUM statement is also introduced, which is the only SAS statement other than the assignment statement that does not begin with a keyword. The SUM statement begins with a variable name followed by a plus sign then followed by a constant, variable or expression. There is an implied RETAIN statement for the summation variable, with its initial value set at 0. Assignment statements begin with a variable name followed by an equals sign then followed by a constant, variable, or expression. */ data group_totals (keep=sex pledge_total miles_total) all_totals (keep=all_pledges all_miles) ; set pledges end=eof; by sex; /* We must reset the summation variables to zero when we start processing a different sex. */ if first.sex then do; pledge_total = 0; miles_total = 0; end; pledge_total + pledged; miles_total + miles; all_pledges + pledged; all_miles + miles; if last.sex then output group_totals; if eof then output all_totals; run; proc print data=group_totals; format pledge_total dollar7. ; title "Total pledges and miles walked by sex"; run; proc print data=all_totals; format all_pledges dollar7. ; title "Total pledges and miles overall"; run; /* The FIRST. and LAST. automatic variables are also useful when your data file consists of more than one observation per subject, and you only want one observation per subject. For example, you may have a file of patient visits where many patients have multiple visits. You may want to analyze only the most recent visit for each subject. Sort the data by patient ID and data of visit, then use the LAST. automatic variable to select the most recent visit for each patient. */ filename visit url "http://www.uvm.edu/~abh/stat295/datasets/patients.dat"; data visits; infile visit; input MRN visit_date:mmddyy10. Sex $ Age Live_alone Hypertension Diabetes COPD; run; proc sort data=visits; by mrn visit_date; run; proc print data=visits; format visit_date date9.; title "All patient data"; run; data recent_visits; set visits; by mrn; if last.mrn; run; proc print data=recent_visits; format visit_date date9.; title "Only recent visits"; run; /* You may have noticed that none of these logical variables get saved with the data. This is true for all automatic logical variables that SAS creates. If we want to see what the values of these logical variables actually are, we can create actual SAS variables from the automatic logical variables. */ data pledges; set pledges end=eof; by sex agegroup; end_of_file = eof; first_sex = first.sex; last_sex = last.sex; first_age = first.agegroup; last_age = last.agegroup; run; proc print data=pledges; title "Values of END, FIRST. and LAST. variables"; run;