***------------------------------------------------------------***;
*This program is to compute Example 1 in the article
"Smoothed  Quantiles for Chi-square Type Test Statistics with Applications" by
Ke-Hai Yuan, Brenna Gomer, and Katerina M. Marcoulides;
*data are from Hozlinger & Swineford (1939);
*model is from Joreskog (1969) Table 1 (e);
*9-variable 3-factor with cross-loadings;
***------------------------------------------------------------***;
**input data;
data holz;
infile 'c:\data\holz9.dat';
input v1-v9;
run;
***------------------------------------------------------------***;

options linesize=100;
proc iml;
reset noname;
***------------------------------------------------------------***;
*Linearly smooth the 5th quantile of a chi-square 
distribution using the lower 10% order statistics,
and smooth 95th quantile of a chi-square 
distribution using the upper 10% order statistics;
start smooth(n, n_b,x, y, coef, hquant_5, hquant_95);
n_c=nrow(coef);
*x is the population quantile;
*y is the bootstrap quantile;
*y=a+b*x+e;
	n10=n*.1;
	n91=n-n10+1;
	n_b91=n_b-n91+1;
	x10=x[1:n10];
	y10=y[1:n10];
	x90=x[n91:n_b]; *top 10% treating non-convergence as on the top;
	y90=y[n91:n_b];
	xmat10=j(n10,1,1)||x10;
	xmat90=j(n_b91,1,1)||x90;
	xmat10_t=xmat10`;
	xmat90_t=xmat90`;
	hbeta10=inv(xmat10_t*xmat10)*xmat10_t*y10;
	hbeta90=inv(xmat90_t*xmat90)*xmat90_t*y90;
	hquant_5=hbeta10[1]*j(n_c,1,1)+coef[,1]*hbeta10[2];
	hquant_95=hbeta90[1]*j(n_c,1,1)+coef[,2]*hbeta90[2];
finish;
*====================================================;	
*The subroutine evaluate the 5 formulas of x_05 and x_95 
as in Tables 4 and 5 of the article;

start pred(N_r,df,f1_N5,f2_N5,f3_N5,f4_C5,f5_N5,
f1_C95,f2_C95,f3_N95,f4_N95,f5_N95);
N=N_r;
*predicting 5% quantile;
f1_N5=-1.7758+0.074123*log(df)
-0.0026038*log(N)*log(df)*log(df)
+(1.6359E-4)*log(N)*log(N)*log(df)*(df**(1/3));
      
f2_N5=-1.6971+.38677*log(df)/log(N)
+(2.7820E-5)*(df**(3/2))/(N**(1/6))
-.14667*(df**(1/3))/(N**(1/6));

f3_N5=-1.7886+0.084029*log(df)
-0.0032594*log(N)*log(df)*log(df)
+(2.2843E-4)*log(N)*log(N)*log(df)*(df**(1/3))
-(2.5002E-11)*N*log(N)*df*df;

f4_C5=-74.138+74.773*(df**(1/3))-24.526*(df**(1/6))*log(df)
+.40840*df*log(df)-.64950*(df**(7/6))+(1.8737E-5)*df*df
+.75171*(df**(2/3))/(N**(5/6))+3.0098*(df**(7/6))/(N**(5/3))
-6.0023*(df**(4/3))/(N*N);

f5_N5=-1.5934-.0010345*(N**(5/6))/((df**(1/3))*log(df))
-.031556*log(N)*log(N)/log(df)
+.045330*(N**(1/6))*log(N)/log(df)
+.010637*(N**(1/3))*log(N)/((df**(1/6))*log(df))
+(7.2387E-8)*(N**(11/6))/(df*log(df));

*predicting 95% quantile;
f1_C95=4.3385-21.422/(N*log(N))-33.654*(df**(1/3))
+58.396*sqrt(df)-27.459*df+9.0968*(df**(5/6))*log(df)
+2.2968*(df**(7/6))-(1.1549E-4)*df*df
-2.0644*sqrt(df)/(sqrt(N)*log(N));

f2_C95=4.2738-10.209/(N*log(N))-33.243*(df**(1/3))
+57.820*sqrt(df)-27.198*df+9.0159*(df**(5/6))*log(df)
+2.2741*(df**(7/6))-(1.1393E-4)*df*df
+(3.0050E-5)*df*df/((N**(1/3))*log(N))
-3.6995*sqrt(df)/(sqrt(N)*log(N))
+.68716*(log(df)*log(df))/(sqrt(N)*log(N));

f3_N95=1.6413-1.3614/(sqrt(N)*log(N));

f4_N95=1.6250-.86420/(N*log(N))+.0056051*log(df)
-(4.0489E-4)*log(df)*log(df)
-1.0297*(df**(1/6))/N+5.5384*log(df)*log(df)/(N**(5/3))
-10.076*(df**(5/6))*log(df)/(N**(11/6))
+11.465*(df**(3/2))/(N**(11/6))
-6.6793*(df**(5/3))/(N**(11/6))
+1.0516*((df/N)**(11/6));

f5_N95=.21864+1.0097*(N**(1/6))-.036540*log(N)*log(N)
-(4.1371E-5)*(N**(2/3))*log(N)+.19742/((df**(5/6))* log(df))
-.57443/(df**(11/6))-.012198*log(N)/sqrt(df)
+.0070765*(N**(1/3))/((df**(1/6))*log(df))
-(7.7312E-4)*(N**(1/3))*log(N)/(log(df)*log(df))
+.0037831*(N**(1/3))*log(N)/(df*df);
finish;

***--------------------------------------------------------------------***;
*subroutine to select the non-duplicated elements
of a symmetric matrix A, Va=vech(A);
***--------------------------------------------------------------------***;
start vech(A,Va);
l=0;
p=nrow(A);
ps=p*(p+1)/2;
Va=j(ps,1,0);
do i=1 to p;
        do j=i to p;
                l=l+1;
                Va[l]=A[j,i];
        end;
end;
finish;
***--------------------------------------------------------------------***;
* creates duplication matrix D_p as defined in Magnus&Neudecker;
***--------------------------------------------------------------------***;
start DP(p, dup);
  Dup=j(p*p,p*(p+1)/2,0);
  count=0;
  do j=1 to p;
    do i=j to p;
      count=count+1;
        if i=j then do;
                Dup[(j-1)*p+j, count]=1;
        end;
        else do;
                Dup[(j-1)*p+i, count]=1;
                Dup[(i-1)*p+j, count]=1;
        end;
    end;
  end;
finish;

***--------------------------------------------------------------------***;
*subroutine to evaluate sigma0=sigma(theta0)
and the Jacobian matrix (dsigma0/dtheta) for conducting 
Fisher-scoring algorithm, and the model sigma(theta) is 
the one in Example 1 of the article;

start sigdsig(theta0,vsig0,sig0,vdsig0);

lamb=j(9,3,0);
lamb[1,1]=1.0;
lamb[2,1]=theta0[1];
lamb[3,1]=theta0[2];
lamb[8,1]=theta0[3];
lamb[9,1]=theta0[4];

lamb[4,2]=1.0;
lamb[5,2]=theta0[5];
lamb[6,2]=theta0[6];

lamb[7,3]=1.0;
lamb[8,3]=theta0[7];
lamb[9,3]=theta0[8];
lamb_t=lamb`;

phi=j(3,3,0);
phi[1,1]=theta0[9];
phi[1,2]=theta0[10]; phi[2,1]=theta0[10];
phi[3,1]=0.0; phi[1,3]=0.0;
phi[2,2]=theta0[11];
phi[3,2]=theta0[12]; phi[2,3]=theta0[12];
phi[3,3]=theta0[13];

psi=theta0[14:22];

*computing sigma based on structured parameters;
sig0=lamb*phi*lamb_t+diag(psi);
run vech(sig0,vsig0);

*the following is to evaluate the Jabobian matrix (dsigma0/dtheta);
*dlamb is the derivative with Lambda;
phl=phi*lamb_t;
lph=lamb*phi;

dlamb=j(45,8,0);

do j=1 to 2;
        tt=j(9,3,0);
        tt[j+1,1]=1;
        ttt=tt*phl+lph*(tt`);
        run vech(ttt,tttt);
        dlamb[,j]=tttt;
end;
        tt=j(9,3,0);
        tt[8,1]=1;
        ttt=tt*phl+lph*(tt`);
        run vech(ttt,tttt);
        dlamb[,3]=tttt;

        tt=j(9,3,0);
        tt[9,1]=1;
        ttt=tt*phl+lph*(tt`);
        run vech(ttt,tttt);
        dlamb[,4]=tttt;

do j=5 to 6;
        tt=j(9,3,0);
        tt[j,2]=1;
        ttt=tt*phl+lph*(tt`);
        run vech(ttt,tttt);
        dlamb[,j]=tttt;
end;

do j=7 to 8;
        tt=j(9,3,0);
        tt[j+1,3]=1;
        ttt=tt*phl+lph*(tt`);
        run vech(ttt,tttt);
        dlamb[,j]=tttt;
end;

*dphi is the derivative with phi;
dphi=j(45, 5, 0);
tt=j(3,3,0);
tt[1,1]=1;
ttt=lamb*tt*lamb_t;
run vech(ttt,tttt);
dphi[,1]=tttt;

tt=j(3,3,0);
tt[1,2]=1; tt[2,1]=1;
ttt=lamb*tt*lamb_t;
run vech(ttt,tttt);
dphi[,2]=tttt;

tt=j(3,3,0);
tt[2,2]=1;
ttt=lamb*tt*lamb_t;
run vech(ttt,tttt);
dphi[,3]=tttt;

tt=j(3,3,0);
tt[2,3]=1; tt[3,2]=1;
ttt=lamb*tt*lamb_t;
run vech(ttt,tttt);
dphi[,4]=tttt;

tt=j(3,3,0);
tt[3,3]=1;
ttt=lamb*tt*lamb_t;
run vech(ttt,tttt);
dphi[,5]=tttt;


*dpsi is the derivative with psi;
dpsi=j(45,9,0);
do j=1 to 9;
        tt=j(9,9,0);
        tt[j,j]=1;
        run vech(tt,vtt);
        dpsi[,j]=vtt;
end;

vdsig0=dlamb||dphi||dpsi;

finish;

*-------------------------------------------------------;
*Estimating the structural model using Fisher-scoring algorithm;

start Est(n, p, scov, theta0, Dup, Sig0, T_ml, div);
ep=.00001;
*ep=E-5 is the convergence criterion;
ep1=.00000000000000000001; 
*ep1=E-20, samples having smallest eigenvalue of the 
information matrix smaller than ep1 are discarded; 
ps=p*(p+1)/2;
q=nrow(theta0);
df=ps-q;

run vech(scov,vscov);
dup_t=dup`;
it_max=100;
***** Fisher-scoring begins here;
delta=1;
div=1;
do i=1 to it_max while (delta>ep);
	run sigdsig(theta0,vsig0,Sig0,vdsig);
	sig_in=inv(Sig0);
	** weight given by normal theory ;
	weight=0.5*dup_t*(sig_in@sig_in)*dup;

	vdsig_t=vdsig`;
	dsw=vdsig_t*weight;
	dwd=dsw*vdsig;
	ddval=eigval(dwd);	
	if ddval[q]>ep1 then do;
		dwd_in=inv(dwd);
		res=vscov-vsig0;
		dtheta=dwd_in*dsw*res;
		theta0=theta0+dtheta;
		delta=max(abs(dtheta));
	end;
	else do;
		i=it_max+1;
	end;
end;
if i<it_max then do;
	div=0;
	*test stat;
	Ssig_in=Scov*sig_in;
	f_ml=trace(ssig_in)-log(det(ssig_in))-p;
	T_ml=(n-1)*f_ml;
end;

finish;

*--------------------------------------------------------;
*Bootstrap sampling from x;
start bootstr(x, theta_0, dup, seed, N_r, n_conv, T_b, n_div);
  n=nrow(x);
  p=ncol(x);
  ps=p*(p+1)/2;
  T_b=j(N_r,1,0);
  n_conv=0;
  n_div=0;
  P_mat=(i(n)-j(n,n,1)/n)/(n-1);
  do b=1 to N_r;
	U=ranuni(J(n,1,seed));
	Ind_b=int(n*U+j(n,1,1));
	X_b=x[Ind_b,]; *bootstrap sample;
	scov_b=(X_b`)*P_mat*X_b;
	theta0=theta_0;
	run Est(n, p, scov_b, theta0, Dup, Sigma0, T_ml, div);
	n_div=n_div+div;
 	if div=0 then do;
 		n_conv=n_conv+1;
    	T_b[n_conv]=T_ml;
    end;
  end;
  T_b=T_b[1:n_conv];
finish;
*--------------------------------------------------;
*Order the statistics of the bootstrap replications;
start order(n_conv, T_b);
  do i=1 to n_conv-1;
        do j=i+1 to n_conv;
                if T_b[i]>T_b[j] then do;
                        tt=T_b[i];
                        T_b[i]=T_b[j];
                        T_b[j]=tt;
                end;
        end;
  end;
finish;
**--------------------------------------------------------------------**;
*obtain the lower 5% critical value under Sigma_a using bootstrap;

start c_alpha(N_r,a,dup,x_c,Scov,Scov12_in, theta_0,Sig0,seed,
q_vec1,q_vec2,coef_norm,qc_5);
Sig_a=a*Scov+(1-a)*Sig0;
call eigen(s_aval,s_avec,Sig_a);
sig12_a=s_avec*diag(sqrt(s_aval))*(s_avec`);

x0=x_c*(Scov12_in*sig12_a);

**Obtain critical value of the statistic under null hypothesis;
run bootstr(x0, theta_0, dup, seed, N_r, n_conv, T_b,n_div);

*H_0: ep>.05, rejection occurs when T_ml<=c_a(epsilon=.05), 
where c_a is the critical value of (T_ml|epsilon=.05);
call sort(T_b, 1);
ncrit5=int(.05*N_r+.5); *(N_r=n_conv+n_div);
ncrit95=int(.95*N_r+.5);
sc_5=T_b[ncrit5];
sc_95=T_b[ncrit95];

*----------------------;
*smoothing critical value;
y=T_b;
n_b=n_conv;
run smooth(N_r,n_b,q_vec1, y, coef_norm,hquant_n5,hquant_n95);
run smooth(N_r,n_b,q_vec2, y, coef_norm, hquant_c5,hquant_c95);
qc_5=n_b||sc_5||(hquant_n5[1:3]`)||hquant_c5[4]||hquant_n5[5];
qc_95=n_b||sc_95||(hquant_c95[1:2]`)||(hquant_N95[3:5]`);

finish;

***--------------------------------------------------------------------***;
*** The following is the main program;
***--------------------------------------------------------------------***;
start main;
print "--------------------------------------------------------------------";
print "Data is from Holzinger and Swinford (1939)";
print "rescaled to have SD less than 2";
print "9-variables, 3-factor, loadings fixed at 1.0";
print "Variable 8 and 9 also load on factor 1";
print "---------------------------------------------------------";
** 26 variables but we only use 9-variable out of them;
use holz;
read all var _num_ into xmatr;

x=xmatr;
n=nrow(x);
p=ncol(x);
ps=p*(p+1)/2;


*resclae data by sc to make SD less than 2;
sc={6.0 4.0 8.0 3.0 4.0 7.0 23.0 20.0 36.0};
rsc=diag(j(1,p,1)/sc);
x=x*rsc;
Scov=x`*(i(n)-j(n,n,1)/n)*x/(n-1);

*starting value using MLE;

theta_00={
0.589  0.807  0.336  0.655  1.044  0.973  0.851  0.544  0.650  0.375  0.785
0.115  0.632  0.495  0.828  0.561  0.295  0.342  0.367  0.334  0.352  0.408};

theta_00=theta_00`;
q=nrow(theta_00);
df=ps-q;

theta_0=theta_00;
run Dp(p,dup);
*Estimate the model by Normal-distribution ML;
run Est(n, p, Scov, theta_0, Dup, Sig0, T_ml, div);
if div=0 then do; *converged;
	sig_0=sig0;
	print "Model chi-square T_ml=" T_ml;
end;
N_r=500; *number of bootstrap replications;

print "sample size=" n;
print "bootstrap rep=" N_r;
*---------------;
*compute the values of x_05 and x_95 as functions of
N_r and df according to the formulas in Tables 4 and 5;
run pred(N_r,df,f1_N5,f2_N5,f3_N5,f4_C5,f5_N5,
f1_C95,f2_C95,f3_N95,f4_N95,f5_N95);

coef_norm=
(f1_N5//f2_N5//f3_N5//f4_C5//f5_N5)||
(f1_C95//f2_C95//f3_N95//f4_N95//f5_N95);

*For smoothing critical value;
q_vec1=j(n_r,1,0);
q_vec2=j(n_r,1,0);
do i=1 to n_r;
	p_i=i/(n_r+1);
	q1_i=probit(p_i);
	q_vec1[i]=q1_i;
	q2_i=cinv(p_i,df);
	q_vec2[i]=q2_i;
end;

seed=1111111111;
print "-----------------seed=1111111111-----------------";
** Standardize data to satisfy the null model Sigma(theta);
barx=j(1,n,1)*x/n;
x_c=x-j(n,1,1)*barx;
call eigen(sval,svec,Scov);
Scov12_in=svec*diag(j(p,1,1)/sqrt(sval))*svec`; *the Sigma^{-1/2};

*1 start with a=0, T_ml will be greater than the c_alpha(a=0),
c_alpha(a=0)<T_mlc_alpha(a=1);
*increase a by .1 each time, 
*2 when T_ml<c_alpha, then decrease a by .01;
*3 when T_ml>c_alpha increase by .001; 
*if T_ml is smaller than c_alpha0 then ep_0 is too large, 
decrease a by .1;
*if T_ml is greater than c_alpha1 then ep_0 is too small, increase a;
*--------------------------------;
*The following code uses the bootstrap approach to solve 
equation (8) as described in the section 
"Bootstrap Approach to Equivalence Testing 
for Mean and Covariance Structural Models";

result_1=j(10,9,0); *contain the results when h changes by .1;
result_2=j(10,9,0); *contain the results when h changes by .01;
result_3=j(10,9,0); *contain the results when h changes by .001;
result_4=j(10,9,0); *contain the results when h changes by .0001;
hat_c5=0;
a=0; N_c1=0;

*qc_5=n_b||sc_5||(hquant_n5[1:3]`)||hquant_c5[4]||hquant_n5[5];
do i=1 to 10 while (hat_c5<T_ml);
	a=a+.1;
	theta_0=theta_00;
	run c_alpha(N_r,a,dup,x_c,Scov,Scov12_in,theta_0,Sig0,seed,
		q_vec1,q_vec2,coef_norm,qc_5);
	result_1[i,]=i||a||qc_5;
	N_c1=N_c1+1;
	hat_c5=qc_5[3];
end;
result_1=result_1[1:N_c1,];

N_c2=0;
do i=1 to 10 while (hat_c5>T_ml);
	a=a-.01;
	theta_0=theta_00;
	run c_alpha(N_r,a,dup,x_c,Scov,Scov12_in,theta_0,Sig0,seed,
		q_vec1,q_vec2,coef_norm,qc_5);
	result_2[i,]=i||a||qc_5;
	N_c2=N_c2+1;
	hat_c5=qc_5[3];
end;
result_2=result_2[1:N_c2,];

N_c3=0;
do i=1 to 10 while (hat_c5<T_ml);
	a=a+.001;
	theta_0=theta_00;
	run c_alpha(N_r,a,dup,x_c,Scov,Scov12_in,theta_0,Sig0,seed,
		q_vec1,q_vec2,coef_norm,qc_5);
	result_3[i,]=i||a||qc_5;
	N_c3=N_c3+1;
	hat_c5=qc_5[3];
end;
result_3=result_3[1:N_c3,];

N_c4=0;
do i=1 to 10 while (hat_c5>T_ml);
	a=a-.0001;
	theta_0=theta_00;
	run c_alpha(N_r,a,dup,x_c,Scov,Scov12_in,theta_0,Sig0,seed,
		q_vec1,q_vec2,coef_norm,qc_5);
	result_4[i,]=i||a||qc_5;
	N_c4=N_c4+1;
	hat_c5=qc_5[3];
end;
result_4=result_4[1:N_c4,];

*qc_5=n_b||sc_5||(hquant_n5[1:3]`)||hquant_c5[4]||hquant_n5[5];
print "----------------------------------------";
print "i1, a, qc_5(n_conv,sc_5,fn5_1,fn5_2,fn5_3,fc5_4,fn5_5)";
print result_1;

print "----------------------------------------";
print "i2, a, qc_5(n_conv,sc_5,fn5_1,fn5_2,fn5_3,fc5_4,fn5_5)";
print result_2;

print "----------------------------------------";
print "i3, a, qc_5(n_conv,sc_5,fn5_1,fn5_2,fn5_3,fc5_4,fn5_5)";
print result_3;

print "----------------------------------------";
print "i4, a, qc_5(n_conv,sc_5,fn5_1,fn5_2,fn5_3,fc5_4,fn5_5)";
print result_4;
print "----------------------------------------";

a_1=result_1[N_c1,2];
Sig_a1=a_1*Scov+(1-a_1)*Sig_0;
theta_0=theta_00;
run Est(n, p, Sig_a1, theta_0, Dup, Sig0, T_ml, div);
if div=0 then do;
	F_mla=T_ml/(N-1);
	RMSEA_a=sqrt(F_mla/df);
	print "a_1, T_mla, F_mla, RMSEA_a=" a_1 T_ml F_mla RMSEA_a;
end;

a_2=result_2[N_c2,2];
Sig_a2=a_2*Scov+(1-a_2)*Sig_0;
theta_0=theta_00;
run Est(n, p, Sig_a2, theta_0, Dup, Sig0, T_ml, div);
if div=0 then do;
	F_mla=T_ml/(N-1);
	RMSEA_a=sqrt(F_mla/df);
	print "a_2, T_mla, F_mla, RMSEA_a=" a_2 T_ml F_mla RMSEA_a;
end;

a_3=result_3[N_c3,2];
Sig_a3=a_3*Scov+(1-a_3)*Sig_0;
theta_0=theta_00;
run Est(n, p, Sig_a3, theta_0, Dup, Sig0, T_ml, div);
if div=0 then do;
	F_mla=T_ml/(N-1);
	RMSEA_a=sqrt(F_mla/df);
	print "a_3, T_mla, F_mla, RMSEA_a=" a_3 T_ml F_mla RMSEA_a;
end;

a_4=result_4[N_c4,2];
Sig_a4=a_4*Scov+(1-a_4)*Sig_0;
theta_0=theta_00;
run Est(n, p, Sig_a4, theta_0, Dup, Sig0, T_ml, div);
if div=0 then do;
	F_mla=T_ml/(N-1);
	RMSEA_a=sqrt(F_mla/df);
	print "a_4, T_mla, F_mla, RMSEA_a=" a_4 T_ml F_mla RMSEA_a;
end;

finish;

run main;