/* SAS program for the estimation of the MED using the likelihood ratio test under total order of Bartholomew. The algorithm of Sun is adapted to obtain the exact distribution of the LRT (also in the unbalanced case). The analysis can be performed for one BY-variable and up to 11 treatment groups. Either the pooled variances for the whole step-down procedure can be used or they can be calculated anew at each step. Author : Frank Bretz (parts of the code are adapted from Dirk Seidel and Martin Burke) Contact: bretz@ifgb.uni-hannover.de Date : 04.12.99 - start version 14.04.03 - bug fix in the LRT module, sum(n) replaced by nu+k (thanks to Martin Hellmich) Input : ds_in : Input data set ds_out: Output data set by : BY variable var : endpoint pooled: use of pooled variance at all steps (=1) or not (=0, default) alpha : type-I-error (default: 0.05) Output : dataset &ds_out, containing the variables BY - BY-groups P_VALUE - p-values for each step COMPARE - treatment groups included in the step VAR - variance DF - degrees of freedom */ /* Example call: ************* options nosource nocenter nonotes nomlogic nomprint nosymbolgen; data testin; input by dose var @@; cards; 1 1 12 1 1 3 1 1 4 1 1 12 1 1 5 1 1 3 1 2 4 1 2 5 1 2 3 1 2 6 1 2 3 1 3 15 1 3 13 1 3 18 1 3 16 1 3 10 1 4 28 1 4 29 1 4 29 1 4 30 2 1 9 2 1 19 2 1 29 2 1 38 2 1 10 2 1 12 2 2 19 2 2 28 2 2 38 2 2 20 2 2 18 2 3 21 2 3 29 2 3 26 2 3 20 2 3 37 2 3 40 2 4 29 2 4 30 2 4 45 2 4 38 3 3 0 3 3 2 3 3 3 3 3 3 3 3 1 3 4 13 3 4 14 3 4 12 3 4 19 3 4 14 3 4 17 3 5 20 3 5 23 3 5 24 3 5 22 3 7 30 3 7 33 3 7 38 3 7 30 ; run; %separ(ds_in=testin,ds_out=testout,by=by dose,var=var,pooled=0,alpha=0.05); */ /********************************************************************************/ /* */ /* %separ decomposes the input datasets correspondingly to the BY-variables and */ /* continues the analysis with the individual parts. Finally the results are */ /* recombined according to the BY-values. */ /* */ /********************************************************************************/ %macro separ(ds_in=,ds_out=,by=,var=,pooled=0,alpha=0.05); proc datasets; delete &ds_out; run; /*************************************/ /* Extraction of the datasets */ /*************************************/ %macro extract; data _extract; set &ds_in; by &by; if _n_ ge &start then do; output; if last.&by2 then do; call symput('start',_n_+1); stop; end; end; run; %mend extract; %local start by1 by2 rest_by; %let start=1; proc sort data=&ds_in; by &by; /*************************************/ /* Decomposing the BY-values */ /*************************************/ data _null_; length reverse rest_by $ 200; reverse=reverse(trim(left("&by"))); by1=reverse(scan(reverse,1)); by2=reverse(scan(reverse,2)); if by2='' then rest_by=''; else rest_by=left(reverse(substr(left(reverse),length(by1)+1))); call symput('by1',by1); call symput('by2',by2); call symput('rest_by',rest_by); run; /*************************************/ /* Loop over all BY-levels */ /*************************************/ %do %until(&stop); /**************************************************************/ /* If no BY-groups are specified, all data are taken over, */ /* otherwise the data are extracted accoring to the BY-groups */ /**************************************************************/ %if &by2 eq %then %do; data _extract; set &ds_in; run; %end; %else %extract; %let stop=1; /**************************************************************/ /* If BY-groups are specified, the data step is performed as */ /* long as the data were extracted */ /**************************************************************/ %if &by2 ne %then %do; data _Null_; set _extract; call symput('stop',0); stop; run; %end; /**************************************************************/ /* Call of %stepdown (at least once for each run of %separ) */ /**************************************************************/ %if (&by2 eq or not &stop) %then %do; %stepdown(ds_in=_extract,ds_out=barthres,by=&by1, var=&var,pooled=&pooled,alpha=&alpha); /**********************************************************/ /* Remember the values of the higher order BY-variables */ /**********************************************************/ %if &by2 ne %then %do; data _extract(keep=&rest_by); set _extract; output; stop; data barthres; if _N_=1 then set _extract; set barthres; run; %end; proc append base=&ds_out data=barthres; run; %end; %end; /**********************************************************/ /* Fitting the variable length */ /**********************************************************/ %local len_comp; data _null_; retain len_comp 0; set &ds_out end=eof; len_comp=max(length(trim(left(compare))),len_comp); if eof then call symput('len_comp',len_comp); data &ds_out; length compare $ &len_comp; set &ds_out; run; proc print data=&ds_out; run; %mend separ; /********************************************************************************/ /* */ /* %stepdown performs the step-down procedure and calls at each step the */ /* Bartholomew macro. If the p-value is smaller than alpha, the highest group */ /* is discarded for the next step. The macro stops if either only one treatment */ /* group remains or if one p-value is larger than the pre-specified level alpha */ /* */ /********************************************************************************/ %macro stepdown(ds_in=,ds_out=,by=,var=,pooled=,alpha=); /**************************************************************************/ /* Calculation of group means, sample size and possibly the variance */ /**************************************************************************/ %macro prep1(ds_in=,ds_barth=,ds_desc=,by=,var=,pool=,alpha=); /***********************************************************************/ /* Calculation of mean for each group */ /***********************************************************************/ proc univariate data = &ds_in noprint; var &var; by &by; output out = &ds_barth n=n mean=xbar; /***********************************************************************/ /* Definition of macro variables for later use */ /***********************************************************************/ data _Null_; length compare $ 200; retain compare ''; set &ds_barth end=eof; compare=trim(left(compare))||' '||trim(left(&by)); if eof then do; call symput('lastby',trim(left(&by))); call symput('anzgrp',_n_); call symput('compare',compare); end; run; /***********************************************************************/ /* Calculation of variance and degrees of freedom; is computed anew at */ /* each step, if not the pooled version is used */ /***********************************************************************/ %if (not &pool or &varianz eq ) %then %do; proc univariate data = &ds_in noprint; var &var; by &by; output out = &ds_desc var=varianz n=anz_val; data &ds_desc; set &ds_desc; varianz2=varianz*(anz_val-1); anz_val2 =anz_val-1; proc univariate noprint; var varianz2 anz_val anz_val2; output out = &ds_desc sum=var1 sum=var2 sum=var3; data &ds_desc; set &ds_desc; varianz=var1/var3; anz_val=var2; drop var1 var2 var3; data _Null_; set &ds_desc; call symput('varianz',varianz); call symput('df',anz_val-&anzgrp); run; %end; %mend prep1; %local signif lastby anzgrp varianz df compare barth stepdown desc; /**************************************************************************/ /* Giving names for temporary datasets */ /**************************************************************************/ %let stepdown=_stepdwn; * single values; %let barth=_barth; * means; %let desc=_desc; * joint variance and sample size; proc datasets; delete &ds_out; data &stepdown; set &ds_in; run; /**************************************************************************/ /* Loop is run until p > alpha or less than 2 groups remain */ /**************************************************************************/ %let signif=1; %do %while(&signif); %prep1(ds_in=&stepdown,ds_desc=&desc,ds_barth=&barth, by=&by,var=&var,pool=&pooled,alpha=&alpha); %barth(ds_barth=&barth,varianz=&varianz,df=&df); /**********************************************************************/ /* Appending the comparisons, the variance and the df to the p-values */ /**********************************************************************/ data &barth; set &barth; length compare $ 200; retain compare "&compare" var &varianz df &df; proc append base=&ds_out data=&barth; data _null_; set &barth; if p_value gt &alpha then call symput('signif',0); run; %if &signif %then %do; /******************************************************************/ /* Discarding the highest treatment group */ /******************************************************************/ %if (&anzgrp gt 2 ) %then %do; data &stepdown; set &stepdown; if trim(left(&by)) ne trim(left("&lastby")); run; %end; %else %let signif=0; %end; %end; %mend stepdown; /********************************************************************************/ /* */ /* %barth performs the calculation of the p-value for the likelihood ratio test */ /* of Bartholomew (1961) for unknown but equal variances in the general */ /* unbalanced one-way layout. The code follows the formulas of Sun (1988). */ /* */ /********************************************************************************/ %macro barth(ds_barth=,varianz=,df=); proc iml symsize=250000; /**************************************************************************/ /* Calculation of orthant probabilities with Jacobi correlation matrix */ /**************************************************************************/ start orthant(nn,gew); n=nn-1; if n>1 then do; r=j(1,n-1,0); do i=1 to n-1; r[i]=-sqrt(gew[i]*gew[i+2]/( (gew[i]+gew[i+1])* (gew[i+1]+gew[i+2]))); end; end; coef=j(1,7,.); pai=3.141592653589793238; do i=1 to int(n/2)+1; coef[i]=1/(2**(n-i+1)*pai**(i-1)); end; if n=1 then pr=coef[1]; else if n=2 then pr=coef[1]+coef[2]*arsin(r[1]); else if n=3 then pr=coef[1]+coef[2]*(arsin(r[1])+arsin(r[2])); else if n=4 then pr=coef[1]+coef[2]*sasin(r,4) +coef[3]*fint(r,4); else if n=5 then pr=coef[1]+coef[2]*sasin(r,5) +coef[3]*si4(r,5); else if n=6 then pr=coef[1]+coef[2]*sasin(r,6) +coef[3]*si4(r,6) +coef[4]*fint(r,6); else if n=7 then pr=coef[1]+coef[2]*sasin(r,7) +coef[3]*si4(r,7) +coef[4]*si6(r,7); else if n=8 then pr=coef[1]+coef[2]*sasin(r,8) +coef[3]*si4(r,8) +coef[4]*si6(r,8) +coef[5]*fint(r,8); else if n=9 then pr=coef[1]+coef[2]*sasin(r,9) +coef[3]*si4(r,9) +coef[4]*si6(r,9) +coef[5]*si8(r,9); else if n=10 then pr=coef[1]+coef[2]*sasin(r,10)+coef[3]*si4(r,10)+coef[4]*si6(r,10)+coef[5]*si8(r,10)+coef[6]*fint(r,10); else if n=11 then pr=coef[1]+coef[2]*sasin(r,11)+coef[3]*si4(r,11)+coef[4]*si6(r,11)+coef[5]*si8(r,11)+coef[6]*si10(r,11); return(pr); finish; start sasin(r,n); sasin=0; do i=1 to n-1; sasin=sasin+arsin(r[i]); end; return(sasin); finish; start si4(r,n); si4=0; r1=dis(r,8,3,n-3); do i=1 to n-3; p=tran(r1,8,3,i); si4=si4+fint(p,4); end; do i=1 to n-4; sum=0; do j=i+3 to n-1; sum=sum+arsin(r[j]); end; si4=si4+arsin(r[i])*sum; end; return(si4); finish; start si6(r,n); si6=0; r1=dis(r,8,3,n-3); r2=dis(r,6,5,n-5); do i=1 to n-5; p=tran(r2,6,5,i); si6=si6+fint(p,6); end; do i=1 to n-6; sum=0; do j=i+5 to n-1; sum=sum+arsin(r[j]); end; p=tran(r1,8,3,i); si6=si6+fint(p,4)*sum; end; do i=4 to n-3; sum=0; do j=1 to i-3; sum=sum+arsin(r[j]); end; p=tran(r1,8,3,i); si6=si6+fint(p,4)*sum; end; do i=1 to n-7; sum=0; do j=i+3 to n-4; sum1=0; do k=j+3 to n-1; sum1=sum1+arsin(r[k]); end; sum=sum+arsin(r[j])*sum1; end; si6=si6+arsin(r[i])*sum; end; return(si6); finish; start si8(r,n); si8=0; r1=dis(r,8,3,n-3); r2=dis(r,6,5,n-5); r3=dis(r,4,7,n-7); do i=1 to n-7; p=tran(r3,4,7,i); si8=si8+fint(p,8); end; do i=1 to n-8; sum=0; do j=i+7 to n-1; sum=sum+arsin(r[j]); end; p=tran(r2,6,5,i); si8=si8+fint(p,6)*sum; end; do i=4 to n-5; sum=0; do j=1 to i-3; sum=sum+arsin(r[j]); end; p=tran(r2,6,5,i); si8=si8+fint(p,6)*sum; end; do i=1 to n-8; sum=0; do j=i+5 to n-3; p=tran(r1,8,3,j); sum=sum+fint(p,4); end; p=tran(r1,8,3,i); si8=si8+fint(p,4)*sum; end; do i=1 to n-9; sum=0; do j=i+5 to n-4; sum1=0; do k=j+3 to n-1; sum1=sum1+arsin(r[k]); end; sum=sum+arsin(r[j])*sum1; end; p=tran(r1,8,3,i); si8=si8+fint(p,4)*sum; end; do i=7 to n-3; sum=0; do j=1 to i-6; sum1=0; do k=j+3 to i-3; sum1=sum1+arsin(r[k]); end; sum=sum+arsin(r[j])*sum1; end; p=tran(r1,8,3,i); si8=si8+fint(p,4)*sum; end; sum=0; do i=9 to n-1; sum=sum+arsin(r[i]); end; p=tran(r1,8,3,4); si8=si8+arsin(r[1])*fint(p,4)*sum; if n>10 then do; sum=0; do i=1 to n-9; sum=sum+arsin(r[i]); end; p=tran(r1,8,3,5); si8=si8+arsin(r[10])*fint(p,4)*sum; si8=si8+arsin(r[1])*arsin(r[4])*arsin(r[7])*arsin(r[10]); end; return(si8); finish; start si10(r,n); si10=0; r1=dis(r,8,3,n-3); r2=dis(r,6,5,n-5); r3=dis(r,4,7,n-7); r4=dis(r,2,9,n-9); do i=1 to n-9; p=tran(r4,2,9,i); si10=si10+fint(p,10); end; p=tran(r3,4,7,4); q=tran(r3,4,7,1); si10=si10+arsin(r[1])*fint(p,8)+arsin(r[10])*fint(q,8); p=tran(r1,8,3,1); q=tran(r2,6,5,6); si10=si10+fint(p,4)*fint(q,6); p=tran(r1,8,3,8); q=tran(r2,6,5,1); si10=si10+fint(p,4)*fint(q,6); return(si10); finish; start fint(r,m); a=j(1,19,.); b=j(1,11,.); c=j(1,7,.); p=j(1,20,.); gl=j(2,12,.); iint=12; g1={-0.981560634246719251 -0.904117256370474857 -0.769902674194304687 -0.587317954286617447 -0.367831498998180194 -0.125233408511463915, 0.0471753363865118272 0.106939325995318431 0.160078328543346226 0.203167426723065922 0.233492536538354809 0.249147045813402785}; do i=1 to iint/2; gl[1,i]=0.5*(g1[1,i]+1); gl[1,iint+1-i]=0.5*(-g1[1,i]+1); gl[2,i]=0.5*g1[2,i]; gl[2,iint+1-i]=gl[2,i]; end; i=1; do j=1 to m+m-3 by 2; p[j]=1; p[j+1]=r[i]; i=i+1; end; p[m+m-1]=1; sum1=0; k1=0; krit=0; do until(k1=iint | krit=1); k1=k1+1; if m<=8 then do; do j=1 to 15; a[j]=p[j]; end; end; else do; ta=1-(p[2]*gl[1,k1])**2; a[1]=ta-p[4]*p[4]; do j=2 to 18 by 2; a[j]=p[j+2]*ta; a[j+1]=ta; end; end; sum2=0; k2=0; do until(k2=iint | krit=1); k2=k2+1; if m<=6 then do; do j=1 to 11; b[j]=p[j]; end; end; else do; ub=a[1]*a[3]-(a[2]*gl[1,k2])**2; ub1=a[5]*ub; b[1]=ub1-a[1]*a[4]*a[4]; do j=2 to 10 by 2; b[j]=a[j+2]*ub; b[j+1]=ub1; end; end; sum3=0; k3=0; do until(k3=iint | krit=1); k3=k3+1; if m=4 then do; do j=1 to 7; c[j]=p[j]; end; end; else do; vc=b[1]*b[3]-(b[2]*gl[1,k3])**2; vc1=b[5]*vc; c[1]=vc1-b[1]*b[4]*b[4]; do j=2 to 6 by 2; c[j]=b[j+2]*vc; c[j+1]=vc1; end; end; sum4=0; do k4=1 to iint; wd=c[1]*c[3]-(c[2]*gl[1,k4])**2; wd1=c[5]*wd; d11=wd1-c[1]*c[4]*c[4]; d12=c[6]*wd; d22=wd1; yyy=d11*d22; if yyy<0 then yyy=1E-09; xxx=d12/sqrt(yyy); if xxx<-1 then xxx=-1+1E-09; sum4=sum4+gl[2,k4]/sqrt(wd)*arsin(xxx); end; sum4=c[2]*sum4; if m^=4 then sum3=sum3+gl[2,k3]/sqrt(vc)*sum4; else krit=1; end; if m^=4 then do; sum3=b[2]*sum3; if m^=6 then sum2=sum2+gl[2,k2]/sqrt(ub)*sum3; else krit=1; end; end; if (m^=4 & m^=6) then do; sum2=a[2]*sum2; if m^=8 then sum1=sum1+gl[2,k1]/sqrt(ta)*sum2; else krit=1; end; end; if m=4 then return(sum4); else if m=6 then return(sum3); else if m=8 then return(sum2); else return(p[2]*sum1); finish; start dis(r,m,n,k); rr=j(k,n,.); do i=1 to k; do j=1 to n; rr[i,j]=r[i-1+j]; end; end; return(rr); finish; start tran(rr,m,n,k); r=j(1,n,.); do i=1 to n; r[i]=rr[k,i]; end; return(r); finish; /**************************************************************************/ /* Calculation of level probabilities */ /**************************************************************************/ start plkwunba(gewicht,p_lkw); k=ncol(gewicht); p_1_k=j(k-1,k-1,1); p_1_k[2,]=j(1,k-1,0.5); p_lkw=j(k,1,0); do t=3 to k; do i=2 to t; mengen=0; anz=erzall(t,i, mengen); nrowmen=nrow(mengen); if t1 then do; do s=1 to nrowmen; gew=j(1,i,0); top=0; prod=1; do u=1 to i; gew[u]=sum(gewicht[top+1:top+mengen[s,u]]); if mengen[s,u]=2 then prod=prod*0.5; else if mengen[s,u]>2 then prod=prod*p_1_k[mengen[s,u],top+1]; top=top+mengen[s,u]; end; if i=2 then z=0.5; else z=orthant(i,gew); if s=1 then y=z; else y=y//z; if t1 then do; do s=1 to nrowmen; gew=j(1,i,0); top=0; prod=1; do u=1 to i; gew[u]=sum(gewi[top+1:top+mengen[s,u]]); if mengen[s,u]=2 then prod=prod*0.5; else if mengen[s,u]>2 then prod=prod*p_1_k[mengen[s,u],top+j-1+1]; top=top+mengen[s,u]; end; if i=2 then z=0.5; else z=orthant(i,gew); if s=1 then y=z; else y=y//z; p_1_k[t,j]=p_1_k[t,j]-z*prod; end; end; end; end; end; finish; start erzall(n,k,mengen); feld1=j(1,k,0); first=0; mtc1=0; t=0; h=0; do while(mtc1<2); i=nexcom2(n, k, feld1, mtc1,t,h); if all(feld1) then if first=0 then do; mengen=feld1; first=1; end; else mengen=mengen//feld1; end; return(nrow(mengen)); finish; start nexcom2(n_ges, c, feld, mtc, t, h); if (mtc=0) then do; feld[1]=n_ges; if(c>1)then do i=2 to c; feld[i]=0; end; t=n_ges; h=0; end; else goto marke2; marke1: if (feld[c]=n_ges) then mtc=2; else mtc=1; return(1); marke2: if (t>1) then h=0; h=h+1; t=feld[h]; feld[h]=0; feld[1]=t-1; feld[h+1]=feld[h+1]+1; goto marke1; finish; /**************************************************************************/ /* Calculation of p-values */ /**************************************************************************/ start LRT(x,s,n,nu); k=ncol(n); m=MLE(x,n); xquer=sum(x#n/n[+]); x1=sum(n#(m-xquer)##2); x2=sum(n#(m-x)##2); t=x1/(x1+x2+nu*s); if k=2 then p_lkw=j(1,2,.5); else run plkwunba(n,p_lkw); p=0; do i=2 to k; p=p+p_lkw[i]*(1-probf(t*(nu+k-i)/((i-1)*(1-t)),i-1,nu+k-i)); end; return(p); finish; start MLE(x,n); k=ncol(x); m=j(1,k,.); a=j(1,k,.); c=j(1,k,.); do i=1 to k; do u=1 to i; do v=i to k; a[v]=sum(x[u:v]#n[u:v])/sum(n[u:v]); end; c[u]=min(a); a=j(1,k,.); end; m[i]=max(c); c=j(1,k,.); end; return(m); finish; use &ds_barth; read all var _all_; close &ds_barth; x=t(xbar); n=t(n); s=&varianz; df=&df; p_value=LRT(x,s,n,df); * print ' P-value from IML:' p_value [format=7.5]; create &ds_barth var {p_value}; append; quit; %mend barth;