options ls=120; filename foo2 url "http://www.uvm.edu/~abh/stat295/datasets/trees2002.dat"; filename foo4 url "http://www.uvm.edu/~abh/stat295/datasets/trees2004.dat"; data trees02; infile foo2; input plot tree treatment $ dbh02; run; data trees04; infile foo4; input plot tree treatment $ dbh04; run; proc sort data=trees02; by plot tree; run; proc sort data=trees04; by plot tree; run; data trees0204; merge trees02 trees04; by plot tree; run; proc print data=trees0204; title "Tree diameters in 2002 and 2004"; run; /* What if the dbh variables were originally named with the same name in the two data sets? We can rename each of them during the merge operation by using data set options. */ data trees02; infile foo2; input plot tree treatment $ dbh; run; data trees04; infile foo4; input plot tree treatment $ dbh; run; proc sort data=trees02; by plot tree; run; proc sort data=trees04; by plot tree; run; data trees0204; merge trees02(rename=(dbh=dbh02)) trees04(rename=(dbh=dbh04)); by plot tree; run; proc print data=trees0204; title "Tree diameters in 2002 and 2004"; run; /* Let's see what happens when we calculate the mean of the two DBH values in two ways; one by using the MEAN function and the other by algebraic equation. */ data trees0204; set trees0204; dbhmean1 = mean(dbh02,dbh04); dbhmean2 = (dbh02+dbh04) / 2; run; proc print data=trees0204; title2 "Calculate mean DBH by two methods"; run; /* We see that when using the MEAN function to calculate a mean, the function will use any available data to give you a mean, using the appropriate n in the denominator. When using an algebraic equation, the result will be missing if any of variables in the equation have a missing value. */