***------------------------------------------------------------***; *This program is to compute Example 1 in the article "Smoothed Quantiles for Chi-square Type Test Statistics with Applications" by Ke-Hai Yuan, Brenna Gomer, and Katerina M. Marcoulides; *data are from Hozlinger & Swineford (1939); *model is from Joreskog (1969) Table 1 (e); *9-variable 3-factor with cross-loadings; ***------------------------------------------------------------***; **input data; data holz; infile 'c:\data\holz9.dat'; input v1-v9; run; ***------------------------------------------------------------***; options linesize=100; proc iml; reset noname; ***------------------------------------------------------------***; *Linearly smooth the 5th quantile of a chi-square distribution using the lower 10% order statistics, and smooth 95th quantile of a chi-square distribution using the upper 10% order statistics; start smooth(n, n_b,x, y, coef, hquant_5, hquant_95); n_c=nrow(coef); *x is the population quantile; *y is the bootstrap quantile; *y=a+b*x+e; n10=n*.1; n91=n-n10+1; n_b91=n_b-n91+1; x10=x[1:n10]; y10=y[1:n10]; x90=x[n91:n_b]; *top 10% treating non-convergence as on the top; y90=y[n91:n_b]; xmat10=j(n10,1,1)||x10; xmat90=j(n_b91,1,1)||x90; xmat10_t=xmat10`; xmat90_t=xmat90`; hbeta10=inv(xmat10_t*xmat10)*xmat10_t*y10; hbeta90=inv(xmat90_t*xmat90)*xmat90_t*y90; hquant_5=hbeta10[1]*j(n_c,1,1)+coef[,1]*hbeta10[2]; hquant_95=hbeta90[1]*j(n_c,1,1)+coef[,2]*hbeta90[2]; finish; *====================================================; *The subroutine evaluate the 5 formulas of x_05 and x_95 as in Tables 4 and 5 of the article; start pred(N_r,df,f1_N5,f2_N5,f3_N5,f4_C5,f5_N5, f1_C95,f2_C95,f3_N95,f4_N95,f5_N95); N=N_r; *predicting 5% quantile; f1_N5=-1.7758+0.074123*log(df) -0.0026038*log(N)*log(df)*log(df) +(1.6359E-4)*log(N)*log(N)*log(df)*(df**(1/3)); f2_N5=-1.6971+.38677*log(df)/log(N) +(2.7820E-5)*(df**(3/2))/(N**(1/6)) -.14667*(df**(1/3))/(N**(1/6)); f3_N5=-1.7886+0.084029*log(df) -0.0032594*log(N)*log(df)*log(df) +(2.2843E-4)*log(N)*log(N)*log(df)*(df**(1/3)) -(2.5002E-11)*N*log(N)*df*df; f4_C5=-74.138+74.773*(df**(1/3))-24.526*(df**(1/6))*log(df) +.40840*df*log(df)-.64950*(df**(7/6))+(1.8737E-5)*df*df +.75171*(df**(2/3))/(N**(5/6))+3.0098*(df**(7/6))/(N**(5/3)) -6.0023*(df**(4/3))/(N*N); f5_N5=-1.5934-.0010345*(N**(5/6))/((df**(1/3))*log(df)) -.031556*log(N)*log(N)/log(df) +.045330*(N**(1/6))*log(N)/log(df) +.010637*(N**(1/3))*log(N)/((df**(1/6))*log(df)) +(7.2387E-8)*(N**(11/6))/(df*log(df)); *predicting 95% quantile; f1_C95=4.3385-21.422/(N*log(N))-33.654*(df**(1/3)) +58.396*sqrt(df)-27.459*df+9.0968*(df**(5/6))*log(df) +2.2968*(df**(7/6))-(1.1549E-4)*df*df -2.0644*sqrt(df)/(sqrt(N)*log(N)); f2_C95=4.2738-10.209/(N*log(N))-33.243*(df**(1/3)) +57.820*sqrt(df)-27.198*df+9.0159*(df**(5/6))*log(df) +2.2741*(df**(7/6))-(1.1393E-4)*df*df +(3.0050E-5)*df*df/((N**(1/3))*log(N)) -3.6995*sqrt(df)/(sqrt(N)*log(N)) +.68716*(log(df)*log(df))/(sqrt(N)*log(N)); f3_N95=1.6413-1.3614/(sqrt(N)*log(N)); f4_N95=1.6250-.86420/(N*log(N))+.0056051*log(df) -(4.0489E-4)*log(df)*log(df) -1.0297*(df**(1/6))/N+5.5384*log(df)*log(df)/(N**(5/3)) -10.076*(df**(5/6))*log(df)/(N**(11/6)) +11.465*(df**(3/2))/(N**(11/6)) -6.6793*(df**(5/3))/(N**(11/6)) +1.0516*((df/N)**(11/6)); f5_N95=.21864+1.0097*(N**(1/6))-.036540*log(N)*log(N) -(4.1371E-5)*(N**(2/3))*log(N)+.19742/((df**(5/6))* log(df)) -.57443/(df**(11/6))-.012198*log(N)/sqrt(df) +.0070765*(N**(1/3))/((df**(1/6))*log(df)) -(7.7312E-4)*(N**(1/3))*log(N)/(log(df)*log(df)) +.0037831*(N**(1/3))*log(N)/(df*df); finish; ***--------------------------------------------------------------------***; *subroutine to select the non-duplicated elements of a symmetric matrix A, Va=vech(A); ***--------------------------------------------------------------------***; start vech(A,Va); l=0; p=nrow(A); ps=p*(p+1)/2; Va=j(ps,1,0); do i=1 to p; do j=i to p; l=l+1; Va[l]=A[j,i]; end; end; finish; ***--------------------------------------------------------------------***; * creates duplication matrix D_p as defined in Magnus&Neudecker; ***--------------------------------------------------------------------***; start DP(p, dup); Dup=j(p*p,p*(p+1)/2,0); count=0; do j=1 to p; do i=j to p; count=count+1; if i=j then do; Dup[(j-1)*p+j, count]=1; end; else do; Dup[(j-1)*p+i, count]=1; Dup[(i-1)*p+j, count]=1; end; end; end; finish; ***--------------------------------------------------------------------***; *subroutine to evaluate sigma0=sigma(theta0) and the Jacobian matrix (dsigma0/dtheta) for conducting Fisher-scoring algorithm, and the model sigma(theta) is the one in Example 1 of the article; start sigdsig(theta0,vsig0,sig0,vdsig0); lamb=j(9,3,0); lamb[1,1]=1.0; lamb[2,1]=theta0[1]; lamb[3,1]=theta0[2]; lamb[8,1]=theta0[3]; lamb[9,1]=theta0[4]; lamb[4,2]=1.0; lamb[5,2]=theta0[5]; lamb[6,2]=theta0[6]; lamb[7,3]=1.0; lamb[8,3]=theta0[7]; lamb[9,3]=theta0[8]; lamb_t=lamb`; phi=j(3,3,0); phi[1,1]=theta0[9]; phi[1,2]=theta0[10]; phi[2,1]=theta0[10]; phi[3,1]=0.0; phi[1,3]=0.0; phi[2,2]=theta0[11]; phi[3,2]=theta0[12]; phi[2,3]=theta0[12]; phi[3,3]=theta0[13]; psi=theta0[14:22]; *computing sigma based on structured parameters; sig0=lamb*phi*lamb_t+diag(psi); run vech(sig0,vsig0); *the following is to evaluate the Jabobian matrix (dsigma0/dtheta); *dlamb is the derivative with Lambda; phl=phi*lamb_t; lph=lamb*phi; dlamb=j(45,8,0); do j=1 to 2; tt=j(9,3,0); tt[j+1,1]=1; ttt=tt*phl+lph*(tt`); run vech(ttt,tttt); dlamb[,j]=tttt; end; tt=j(9,3,0); tt[8,1]=1; ttt=tt*phl+lph*(tt`); run vech(ttt,tttt); dlamb[,3]=tttt; tt=j(9,3,0); tt[9,1]=1; ttt=tt*phl+lph*(tt`); run vech(ttt,tttt); dlamb[,4]=tttt; do j=5 to 6; tt=j(9,3,0); tt[j,2]=1; ttt=tt*phl+lph*(tt`); run vech(ttt,tttt); dlamb[,j]=tttt; end; do j=7 to 8; tt=j(9,3,0); tt[j+1,3]=1; ttt=tt*phl+lph*(tt`); run vech(ttt,tttt); dlamb[,j]=tttt; end; *dphi is the derivative with phi; dphi=j(45, 5, 0); tt=j(3,3,0); tt[1,1]=1; ttt=lamb*tt*lamb_t; run vech(ttt,tttt); dphi[,1]=tttt; tt=j(3,3,0); tt[1,2]=1; tt[2,1]=1; ttt=lamb*tt*lamb_t; run vech(ttt,tttt); dphi[,2]=tttt; tt=j(3,3,0); tt[2,2]=1; ttt=lamb*tt*lamb_t; run vech(ttt,tttt); dphi[,3]=tttt; tt=j(3,3,0); tt[2,3]=1; tt[3,2]=1; ttt=lamb*tt*lamb_t; run vech(ttt,tttt); dphi[,4]=tttt; tt=j(3,3,0); tt[3,3]=1; ttt=lamb*tt*lamb_t; run vech(ttt,tttt); dphi[,5]=tttt; *dpsi is the derivative with psi; dpsi=j(45,9,0); do j=1 to 9; tt=j(9,9,0); tt[j,j]=1; run vech(tt,vtt); dpsi[,j]=vtt; end; vdsig0=dlamb||dphi||dpsi; finish; *-------------------------------------------------------; *Estimating the structural model using Fisher-scoring algorithm; start Est(n, p, scov, theta0, Dup, Sig0, T_ml, div); ep=.00001; *ep=E-5 is the convergence criterion; ep1=.00000000000000000001; *ep1=E-20, samples having smallest eigenvalue of the information matrix smaller than ep1 are discarded; ps=p*(p+1)/2; q=nrow(theta0); df=ps-q; run vech(scov,vscov); dup_t=dup`; it_max=100; ***** Fisher-scoring begins here; delta=1; div=1; do i=1 to it_max while (delta>ep); run sigdsig(theta0,vsig0,Sig0,vdsig); sig_in=inv(Sig0); ** weight given by normal theory ; weight=0.5*dup_t*(sig_in@sig_in)*dup; vdsig_t=vdsig`; dsw=vdsig_t*weight; dwd=dsw*vdsig; ddval=eigval(dwd); if ddval[q]>ep1 then do; dwd_in=inv(dwd); res=vscov-vsig0; dtheta=dwd_in*dsw*res; theta0=theta0+dtheta; delta=max(abs(dtheta)); end; else do; i=it_max+1; end; end; if iT_b[j] then do; tt=T_b[i]; T_b[i]=T_b[j]; T_b[j]=tt; end; end; end; finish; **--------------------------------------------------------------------**; *obtain the lower 5% critical value under Sigma_a using bootstrap; start c_alpha(N_r,a,dup,x_c,Scov,Scov12_in, theta_0,Sig0,seed, q_vec1,q_vec2,coef_norm,qc_5); Sig_a=a*Scov+(1-a)*Sig0; call eigen(s_aval,s_avec,Sig_a); sig12_a=s_avec*diag(sqrt(s_aval))*(s_avec`); x0=x_c*(Scov12_in*sig12_a); **Obtain critical value of the statistic under null hypothesis; run bootstr(x0, theta_0, dup, seed, N_r, n_conv, T_b,n_div); *H_0: ep>.05, rejection occurs when T_ml<=c_a(epsilon=.05), where c_a is the critical value of (T_ml|epsilon=.05); call sort(T_b, 1); ncrit5=int(.05*N_r+.5); *(N_r=n_conv+n_div); ncrit95=int(.95*N_r+.5); sc_5=T_b[ncrit5]; sc_95=T_b[ncrit95]; *----------------------; *smoothing critical value; y=T_b; n_b=n_conv; run smooth(N_r,n_b,q_vec1, y, coef_norm,hquant_n5,hquant_n95); run smooth(N_r,n_b,q_vec2, y, coef_norm, hquant_c5,hquant_c95); qc_5=n_b||sc_5||(hquant_n5[1:3]`)||hquant_c5[4]||hquant_n5[5]; qc_95=n_b||sc_95||(hquant_c95[1:2]`)||(hquant_N95[3:5]`); finish; ***--------------------------------------------------------------------***; *** The following is the main program; ***--------------------------------------------------------------------***; start main; print "--------------------------------------------------------------------"; print "Data is from Holzinger and Swinford (1939)"; print "rescaled to have SD less than 2"; print "9-variables, 3-factor, loadings fixed at 1.0"; print "Variable 8 and 9 also load on factor 1"; print "---------------------------------------------------------"; ** 26 variables but we only use 9-variable out of them; use holz; read all var _num_ into xmatr; x=xmatr; n=nrow(x); p=ncol(x); ps=p*(p+1)/2; *resclae data by sc to make SD less than 2; sc={6.0 4.0 8.0 3.0 4.0 7.0 23.0 20.0 36.0}; rsc=diag(j(1,p,1)/sc); x=x*rsc; Scov=x`*(i(n)-j(n,n,1)/n)*x/(n-1); *starting value using MLE; theta_00={ 0.589 0.807 0.336 0.655 1.044 0.973 0.851 0.544 0.650 0.375 0.785 0.115 0.632 0.495 0.828 0.561 0.295 0.342 0.367 0.334 0.352 0.408}; theta_00=theta_00`; q=nrow(theta_00); df=ps-q; theta_0=theta_00; run Dp(p,dup); *Estimate the model by Normal-distribution ML; run Est(n, p, Scov, theta_0, Dup, Sig0, T_ml, div); if div=0 then do; *converged; sig_0=sig0; print "Model chi-square T_ml=" T_ml; end; N_r=500; *number of bootstrap replications; print "sample size=" n; print "bootstrap rep=" N_r; *---------------; *compute the values of x_05 and x_95 as functions of N_r and df according to the formulas in Tables 4 and 5; run pred(N_r,df,f1_N5,f2_N5,f3_N5,f4_C5,f5_N5, f1_C95,f2_C95,f3_N95,f4_N95,f5_N95); coef_norm= (f1_N5//f2_N5//f3_N5//f4_C5//f5_N5)|| (f1_C95//f2_C95//f3_N95//f4_N95//f5_N95); *For smoothing critical value; q_vec1=j(n_r,1,0); q_vec2=j(n_r,1,0); do i=1 to n_r; p_i=i/(n_r+1); q1_i=probit(p_i); q_vec1[i]=q1_i; q2_i=cinv(p_i,df); q_vec2[i]=q2_i; end; seed=1111111111; print "-----------------seed=1111111111-----------------"; ** Standardize data to satisfy the null model Sigma(theta); barx=j(1,n,1)*x/n; x_c=x-j(n,1,1)*barx; call eigen(sval,svec,Scov); Scov12_in=svec*diag(j(p,1,1)/sqrt(sval))*svec`; *the Sigma^{-1/2}; *1 start with a=0, T_ml will be greater than the c_alpha(a=0), c_alpha(a=0)c_alpha increase by .001; *if T_ml is smaller than c_alpha0 then ep_0 is too large, decrease a by .1; *if T_ml is greater than c_alpha1 then ep_0 is too small, increase a; *--------------------------------; *The following code uses the bootstrap approach to solve equation (8) as described in the section "Bootstrap Approach to Equivalence Testing for Mean and Covariance Structural Models"; result_1=j(10,9,0); *contain the results when h changes by .1; result_2=j(10,9,0); *contain the results when h changes by .01; result_3=j(10,9,0); *contain the results when h changes by .001; result_4=j(10,9,0); *contain the results when h changes by .0001; hat_c5=0; a=0; N_c1=0; *qc_5=n_b||sc_5||(hquant_n5[1:3]`)||hquant_c5[4]||hquant_n5[5]; do i=1 to 10 while (hat_c5T_ml); a=a-.01; theta_0=theta_00; run c_alpha(N_r,a,dup,x_c,Scov,Scov12_in,theta_0,Sig0,seed, q_vec1,q_vec2,coef_norm,qc_5); result_2[i,]=i||a||qc_5; N_c2=N_c2+1; hat_c5=qc_5[3]; end; result_2=result_2[1:N_c2,]; N_c3=0; do i=1 to 10 while (hat_c5T_ml); a=a-.0001; theta_0=theta_00; run c_alpha(N_r,a,dup,x_c,Scov,Scov12_in,theta_0,Sig0,seed, q_vec1,q_vec2,coef_norm,qc_5); result_4[i,]=i||a||qc_5; N_c4=N_c4+1; hat_c5=qc_5[3]; end; result_4=result_4[1:N_c4,]; *qc_5=n_b||sc_5||(hquant_n5[1:3]`)||hquant_c5[4]||hquant_n5[5]; print "----------------------------------------"; print "i1, a, qc_5(n_conv,sc_5,fn5_1,fn5_2,fn5_3,fc5_4,fn5_5)"; print result_1; print "----------------------------------------"; print "i2, a, qc_5(n_conv,sc_5,fn5_1,fn5_2,fn5_3,fc5_4,fn5_5)"; print result_2; print "----------------------------------------"; print "i3, a, qc_5(n_conv,sc_5,fn5_1,fn5_2,fn5_3,fc5_4,fn5_5)"; print result_3; print "----------------------------------------"; print "i4, a, qc_5(n_conv,sc_5,fn5_1,fn5_2,fn5_3,fc5_4,fn5_5)"; print result_4; print "----------------------------------------"; a_1=result_1[N_c1,2]; Sig_a1=a_1*Scov+(1-a_1)*Sig_0; theta_0=theta_00; run Est(n, p, Sig_a1, theta_0, Dup, Sig0, T_ml, div); if div=0 then do; F_mla=T_ml/(N-1); RMSEA_a=sqrt(F_mla/df); print "a_1, T_mla, F_mla, RMSEA_a=" a_1 T_ml F_mla RMSEA_a; end; a_2=result_2[N_c2,2]; Sig_a2=a_2*Scov+(1-a_2)*Sig_0; theta_0=theta_00; run Est(n, p, Sig_a2, theta_0, Dup, Sig0, T_ml, div); if div=0 then do; F_mla=T_ml/(N-1); RMSEA_a=sqrt(F_mla/df); print "a_2, T_mla, F_mla, RMSEA_a=" a_2 T_ml F_mla RMSEA_a; end; a_3=result_3[N_c3,2]; Sig_a3=a_3*Scov+(1-a_3)*Sig_0; theta_0=theta_00; run Est(n, p, Sig_a3, theta_0, Dup, Sig0, T_ml, div); if div=0 then do; F_mla=T_ml/(N-1); RMSEA_a=sqrt(F_mla/df); print "a_3, T_mla, F_mla, RMSEA_a=" a_3 T_ml F_mla RMSEA_a; end; a_4=result_4[N_c4,2]; Sig_a4=a_4*Scov+(1-a_4)*Sig_0; theta_0=theta_00; run Est(n, p, Sig_a4, theta_0, Dup, Sig0, T_ml, div); if div=0 then do; F_mla=T_ml/(N-1); RMSEA_a=sqrt(F_mla/df); print "a_4, T_mla, F_mla, RMSEA_a=" a_4 T_ml F_mla RMSEA_a; end; finish; run main;