options ls=120;

filename foo url "http://www.uvm.edu/~abh/stat295/datasets/fev1mult.dat";

data fev1mult;
     infile foo;
     input patient basefev1 fev11h fev12h fev13h fev14h fev15h fev16h 
           fev17h fev18h drug $;
run;

proc print data=fev1mult;
title "Multivariate Arrangement";
run;

/*
	This data set is from Chapter 8 of SAS for Linear Models, 
	4th. ed. by Littell, Stroup, and Freund.  Chapter 8 deals 
	with repeated measures analyses and how data needs to be 
	arranged differently when using PROC GLM than when using 
	PROC MIXED.

	Data consist of 2 asthma drugs (a and c) with placebo (p) 
	administered randomly to 24 patients.  FEV1 (forced 
	expiratory volume in the first second, a measure of 
	respiratory ability) is measured before drug administration 
	and at hourly intervals for 8 hours.  Each hourly measurement 
	is a separate variable.  Each observation contains all data 
	for one drug-patient combination.  This allows us to make 
	computations in a DATA step and create new variables using 
	statistical functions (below).  This example also illustrates 
	how to refer to a list of consecutive variables without 
	explicitly naming them all.  
*/

data fev1multstats;
     set fev1mult;
     meanfev1 = mean(of fev11h -- fev18h);
	 stdfev1 = std(of fev11h -- fev18h);
	 minfev1 = min(of fev11h -- fev18h);
	 maxfev1 = max(of fev11h -- fev18h);
run;

proc print data=fev1multstats;
title2 "with descriptive statistics";
run;

/*
	Frequently, it is necessary to rearrange this type of multivariate 
	layout into a univariate form.  In this example, we want to 
	create 8 univariate observations from one multivariate observation, 
	where an observation consists of a single measurement for each 
	drug-patient-hour combination.  There are many ways to do this.  
	Four examples are given here.  The first (below) re-inputs the 
	raw data and uses the trailing-@ symbol to hold the pointer on 
	the same line until all fev1 measurements are read.  This DATA 
	step also contains an example of reading a value at the end of 
	the line, then skipping backward to read the fev1 values.  It 
	is necessary to read values for all common variables in the first 
	INPUT statement, otherwise when the first OUTPUT statement is 
	encountered, there may be missing values for some variables.
*/

data fev1uni1;
     infile foo;
     input patient basefev1 @88 drug $  @24 fev1 @;
           hour=1;
           output;
	 input fev1 @;
           hour=2;
           output;
	 input fev1 @;
           hour=3;
           output;
	 input fev1 @;
           hour=4;
           output;
	 input fev1 @;
           hour=5;
           output;
	 input fev1 @;
           hour=6;
           output;
	 input fev1 @;
           hour=7;
           output;
	 input fev1;
           hour=8;
           output;
run;

proc print data=fev1uni1;
title "Univariate Arrangement";
title2 "Re-input raw data into univariate form";
run;

/*
	Another way to do this is to reread the existing SAS data set 
	(with a SET statement) and rearrange the multivariate layout 
	into the univariate form as each observation is reread from 
	the existing data set.  This is not unlike rereading the raw 
	data, except no pointer controls are needed, as there is no 
	INPUT statement.  This method essentially consists of renaming 
	each hourly fev1 variable to a common name, creating an hour 
	variable, and outputting the result to the data set.  The 
	hourly fev1 variables are dropped from the new data set.
*/

data fev1uni2; 
     set fev1mult;
     drop fev11h fev12h fev13h fev14h fev15h fev16h fev17h fev18h;
     hour=1; 
        fev1=fev11h; 
        output;
     hour=2; 
        fev1=fev12h; 
        output;
     hour=3; 
        fev1=fev13h; 
        output;
     hour=4; 
        fev1=fev14h; 
        output;
     hour=5; 
        fev1=fev15h; 
        output;
     hour=6; 
        fev1=fev16h; 
        output;
     hour=7; 
        fev1=fev17h; 
        output;
     hour=8; 
        fev1=fev18h; 
        output;
run;

proc print data=fev1uni2;
title2 "Reorganize existing data set in multivariate form";
run;

/*
	If we want to be absolutely certain that the two data sets 
	created in univariate form above are the same, we can run 
	the COMPARE procedure and SAS will report any differences found.
*/

proc compare base=fev1uni1 compare=fev1uni2;
run;

/*
	The above example can get rather cumbersome if there are 
	many variables that need to be rearranged into a univariate 
	form.  One way to simplify this is to put the hourly variables 
	into an array and then process the elements of this array.  
	The ARRAY statement assigns the hourly variables into an array.  
	The DO loop repetitively processes the elements of this array 
	in the same way.  By using this method, we replace 24 statements 
	in the above DATA step with 5 statements.  
*/

data fev1uni3;
     set fev1mult;
     drop fev11h fev12h fev13h fev14h fev15h fev16h fev17h fev18h;

	 array f {8} fev11h -- fev18h;
	 do hour = 1 to 8;
	    fev1 = f{hour};
		output;
	 end;
run;

proc print data=fev1uni3;
title2 "Use ARRAY statement and DO loop to reorganize multivariate form";
run;

proc compare base=fev1uni1 compare=fev1uni3;
run;

/*
	Yet another way to rearrange the multivariate form into 
	the univariate form is by using the TRANSPOSE procedure.  
	This procedure does exactly what its name suggests; it 
	treats the data set like a matrix and transposes it according 
	to your instructions.  This effectively changes observations 
	into variables and/or variables into observations.  Transposition 
	can be done for subsets of the data ("sub-matrices") using a BY 
	statement.  The VAR statement names the variables to be changed 
	into observations.  The code below takes the 8 fev1 variables for 
	each drug-patient combination and turns them into a single 
	variable with 8 observations.
*/

proc sort data=fev1mult;
     by drug patient basefev1;
run;

proc transpose data=fev1mult out=fev1uni4;
     by drug patient basefev1;
	 var fev11h -- fev18h;
run;

proc print data=fev1uni4;
title2 "Use TRANSPOSE procedure to reorganize multivariate form";
run;

/*
	As we see, there are a couple of problems to fix after 
	transposing the data.  There is no "hour" variable in 
	the data and the variable that contains the fev1 values 
	is named "COL1".  This is the default name that the 
	transpose procedure gives to the resulting transposed 
	values.  There is also a variable called _NAME_ that 
	the procedure creates to hold the original names of the 
	variables that were transposed.  Because the names of 
	the original variables contained the hour when fev1 
	measurements were taken, we can use this variable to 
	extract values for the hour variable.
*/

data fev1uni4;
     set fev1uni4;
	 length hour 8 ;
     rename col1 = fev1;
	 hour = substr(_NAME_,5,1);
	 drop _NAME_;
run;

proc print data=fev1uni4;
title3 "May require ""cleaning up"" the data in a DATA step after the TRANSPOSE";
run;

proc compare base=fev1uni1 compare=fev1uni4;
run;

/*
	Our data does not contain the sex of each patient, but 
	we have that information in another raw data file.  The 
	code below reads the data containing sex for each patient 
	and merges it into the multivariate data arrangement.  
	The DATA step that performs the merge also has an example 
	of using the PUT function to create a new variable by 
	assigning the values of a SAS format to values of the new 
	variable.  We use the SEX format created in an earlier SAS 
	program (formats.sas) to assign new variable values "Male" 
	and "Female" to the values 1 and 2 from the numeric variable.  
	Note the LIBNAME statement below.  Since SAS will look for 
	existing formats in a directory having the library name 
	LIBRARY assigned to it, this is how SAS finds the format 
	for sex that was created several weeks ago.
*/

libname library "Z:\sasformats";
filename foo2 url "http://www.uvm.edu/~abh/stat295/datasets/fev1sex.dat";

data fev1sex;
	 infile foo2;
     input patient sex;
run;

proc sort data=fev1mult;
     by patient;
run;

data fev1multsex;
     merge fev1mult fev1sex;
	 by patient;
	 sexchar = put(sex,sexfmt.);
run;

proc print data=fev1multsex;
title "Multivariate arrangement with numeric and character SEX variables merged in";
run;