data reg1; input x1 x2 x3 y; cards; 3.05 1.45 5.67 0.34 4.22 1.35 4.86 0.11 3.34 0.26 4.19 0.38 3.77 0.23 4.42 0.68 3.52 1.10 3.17 0.18 3.54 0.76 2.76 0.0 3.74 1.59 3.81 0.08 3.78 0.39 3.23 0.11 2.92 0.39 5.44 1.53 3.10 0.64 6.16 0.77 2.86 0.82 5.48 1.17 2.78 0.64 4.62 1.01 2.22 0.85 4.49 0.89 2.67 0.90 5.59 1.40 3.12 0.92 5.86 1.05 3.03 0.97 6.60 1.15 2.45 0.18 4.51 1.49 4.12 0.62 5.31 0.51 4.61 0.51 5.16 0.18 3.94 0.45 4.45 0.34 4.12 1.79 6.17 0.36 2.93 0.25 3.38 0.89 2.66 0.31 3.51 0.91 3.17 0.20 3.08 0.92 2.79 0.24 3.98 1.35 2.61 0.20 3.64 1.33 3.74 2.27 6.50 0.23 3.13 1.48 4.28 0.26 3.49 0.25 4.71 0.73 2.94 2.22 4.58 0.23 ; proc glm data=reg1; /* Using PROC GLM (General Linear Model) */ /* note xpx, i and solution provide us with X transpose X, its inverse and the solutions */ model y = x1 x2 x3/xpx i solution; /* compute and output fitted/predicted values (p=yhat), residuals (r=ehat), standard errors of the predicted values (stdp=se) and 95% upper and lower confidence limits of our fitted values (u95m=ul and l95m=ll) */ output out=reg1p p=yhat r=ehat stdp=se u95m=ul l95m=ll; run; quit; /* Exit PROC GLM */ /* print out the new data set reg1p */ proc print data=reg1p; title 'Observations, fitted values, residuals etc'; run; /* plot using gplot */ proc gplot data=reg1p; title ' plot Y vs X1 '; plot y*x1; run; proc gplot data=reg1p; title ' Plot Residuals vs X1 '; plot ehat*X1; run; ods rtf body='c:\temp\imlreg1.rtf'; proc iml; reset print; /* Reset print flag so all matrices are echoed */ /* Defining X and Y explicitly */ x = { 1 3.05 1.45 5.67 , 1 4.22 1.35 4.86 , 1 3.34 0.26 4.19 , 1 3.77 0.23 4.42 , 1 3.52 1.10 3.17 , 1 3.54 0.76 2.76 , 1 3.74 1.59 3.81 , 1 3.78 0.39 3.23 , 1 2.92 0.39 5.44 , 1 3.10 0.64 6.16 , 1 2.86 0.82 5.48 , 1 2.78 0.64 4.62 , 1 2.22 0.85 4.49 , 1 2.67 0.90 5.59 , 1 3.12 0.92 5.86 , 1 3.03 0.97 6.60 , 1 2.45 0.18 4.51 , 1 4.12 0.62 5.31 , 1 4.61 0.51 5.16 , 1 3.94 0.45 4.45 , 1 4.12 1.79 6.17 , 1 2.93 0.25 3.38 , 1 2.66 0.31 3.51 , 1 3.17 0.20 3.08 , 1 2.79 0.24 3.98 , 1 2.61 0.20 3.64 , 1 3.74 2.27 6.50 , 1 3.13 1.48 4.28 , 1 3.49 0.25 4.71 , 1 2.94 2.22 4.58 }; y = { 0.34, 0.11, 0.38, 0.68, 0.18, 0.0 , 0.08, 0.11, 1.53, 0.77, 1.17, 1.01, 0.89, 1.40, 1.05, 1.15, 1.49, 0.51, 0.18, 0.34, 0.36, 0.89, 0.91, 0.92, 1.35, 1.33, 0.23, 0.26, 0.73, 0.23}; /* Reading a SAS dataset into a matrix in IML Saves work (!), reduces mistakes, and allows you to check your calculations from the previous steps */ use reg1; read all var{x1 x2 x3 y} into a; /* read variables from SAS data set */ x1 = a[,1]; /* extract column 1 */ x2 = a[,2]; /* extract column 2 */ x3 = a[,3]; /* extract column 3 */ y = a[,4]; /* extract column 4 */ nobs = nrow(a); /* number of rows, equals N */ mu = J(nobs,1,1); /* create a column matrix of 1's */ x = mu || x1 || x2 || x3; /* horizontal concatenate to make X */ xtx = x` * x; /* Construct X`X */ xty = x` * y; /* Construct X`Y */ invxtx = inv(xtx); /* Inverse of X`X */ b = invxtx * xty; /* Vector of parameter estimates */ tss = y` * y; /* Total Sums of Squares */ ssr = b` * xty; /* Sums of Squares for the Model */ sse = tss - ssr; /* Residual Sums of Squares */ nobs = nrow(x); /* Number of observations = No rows of X */ rx = 4; /* Rank of X, r(X) = 4 */ dfe = nobs - rx; /* D.f.e = N - r(X) */ sumy = sum(y); /* Sum Y's */ ybar = sumy/nobs; /* Mean of Y */ cf = nobs * ybar * ybar; /* Correction Factor for the Mean */ ssrm = ssr - cf; /* Sums of Squares for the Model corrected for the Mean */ mse = sse/dfe; /* Mean Square Error = SSE/Dfe */ covb = invxtx * mse; /* Variance-covariance matrix of estimates */ seb0 = sqrt(covb[1,1]); /* s.e. of b0 */ seb1 = sqrt(covb[2,2]); /* s.e. of b1 */ seb2 = sqrt(covb[3,3]); /* s.e. of b2 */ seb3 = sqrt(covb[4,4]); /* s.e. of b3 */ t0 = b[1]/seb0; /* t-test, estimate/se */ pr = 2*(1 - probt(abs(t0),dfe)); /* probability */ t1 = b[2]/seb1; t2 = b[3]/seb2; t3 = b[4]/seb3; t4 = (b[1] - -0.5)/seb1; /* t-test of whether b1 = -0.5 */ pr = 2*(1-probt(abs(t4),dfe)); k = {0, 1, 0, 0}; /* k` matrix to calculate the SS for R(b1|b0,b2,b3), N.B. k' is a row vector, so k is a column vector! */ kb = k` * b; kxxk = k` * invxtx * k; invkk = ginv(kxxk); /* generalised inverse (just to be safe) */ ss1 = kb` * invkk * kb; ms1 = ss1/1; /* divide by 1 (redundant here), because 1 d.f. */ f1 = ms1/mse; pr = 1 - probf(f1,1,dfe); k = {0, 0, 1, 0}; /* k` matrix to calculate the SS for R(b2|b0,b1,b3) */ kb = k` * b; kxxk = k` * invxtx * k; invkk = ginv(kxxk); ss2 = kb` * invkk * kb; ms2 = ss2/1; f2 = ms2/mse; pr = 1 - probf(f2,1,dfe); k = {0, 0, 0, 1}; /* k` matrix to calculate the SS for R(b3|b0,b1,b2) */ kb = k` * b; kxxk = k` * invxtx * k; invkk = ginv(kxxk); ss3 = kb` * invkk * kb; ms3 = ss3/1; f2 = ms3/mse; pr = 1 - probf(f2,1,dfe); k = {0 0 0, 1 0 0, 0 1 0, 0 0 1}; /* k` matrix to calculate the SS for R(b1,b2,b3|b0) */ kb = k` * b; kxxk = k` * invxtx * k; invkk = ginv(kxxk); ss3 = kb` * invkk * kb; ms3 = ss3/3; f3 = ms3/mse; pr = 1 - probf(f3,3,dfe); yhat = x * b; /* Fitted values for each observation, Xb */ /* vecdiag extracts the diagonals from a matrix */ seyhat = sqrt(vecdiag(x * invxtx * x`) * mse); /* s.e. to fitted values */ /* Calculate average for x1 and x2, then estimate value for each x1 point, at an average value of x2, and vice versa */ x1bar = sum(a[,1])/nobs; x2bar = sum(a[,2])/nobs; xnew = x; xnew[,3] = x2bar; ynew = xnew * b; svnew = (1 + xnew * invxtx * xnew`)*mse; /* sampling variance for new value */ senew = sqrt(vecdiag(svnew)); quit; ods rtf close;