###------------------------------------------------------------### # This program is to compute Example 1 in the article # "Smoothed Quantiles for Chi-square Type Test Statistics with Applications" by # Ke-Hai Yuan, Brenna Gomer, and Katerina M. Marcoulides; # data are from Hozlinger & Swineford (1939); # model is from Joreskog (1969) Table 1 (e); # 9-variable 3-factor with cross-loadings; #====================== Table of Contents =======================# # 1. Libraries and Functions # 2. Functions # 3. Load data and run the functions #================================================================# ######================== 1. Libraries =====================###### library(matrixcalc) ; library(rsem) ######================== 2. Functions =====================###### ###------------------------------------------------------------### # Linearly smooth the 5th quantile of a chi-square # distribution using the lower 10% order statistics, # and smooth 95th quantile of a chi-square # distribution using the upper 10% order statistics # x is the population quantile # y is the bootstrap quantile # y=a+b*x+e smooth = function(n, n_b, x, y, coef){ n_c = nrow(coef) n10 = n*.1 n91 = n - n10 + 1 n_b91 = n_b - n91 + 1 x10 = x[1:n10] y10 = y[1:n10] y90 = y[y>=quantile(y, c(.9))] x90 = x[x>=quantile(x, c(.9))] # top 10% treating non-convergence as on the top # y90 = y[n91:n_b] xmat10 = cbind(matrix(1, nrow = n10, ncol = 1), x10) xmat90 = cbind(matrix(1, nrow = length(x90), ncol = 1), x90) xmat10_t = t(xmat10) ; xmat90_t = t(xmat90) hbeta10 = ginv(xmat10_t%*%xmat10)%*%xmat10_t%*%y10 hbeta90 = ginv(xmat90_t%*%xmat90)%*%xmat90_t%*%y90 hquant_5 = hbeta10[1,1]*matrix(1, nrow = n_c, ncol = 1) + coef[,1]*hbeta10[2,1] hquant_95 = hbeta90[1,1]*matrix(1, nrow = n_c, ncol = 1) + coef[,2]*hbeta90[2,1] return(list(hquant5 = hquant_5, hquant95 = hquant_95)) } ###====================================================### # This function evaluates the 5 formulas of x_05 and x_95 # as in Tables 4 and 5 of the article pred = function(N_r,df){ N = N_r # predicting 5% quantile f1_N5=-1.7758 + 0.074123*log(df) - 0.0026038*log(N)*log(df)*log(df) + (1.6359E-4)*log(N)*log(N)*log(df)*(df^(1/3)) f2_N5 = -1.6971 + .38677*log(df)/log(N) + (2.7820E-5)*(df^(3/2))/(N^(1/6)) - .14667*(df^(1/3))/(N^(1/6)) f3_N5 = -1.7886 + 0.084029*log(df) - 0.0032594*log(N)*log(df)*log(df) + (2.2843E-4)*log(N)*log(N)*log(df)*(df^(1/3)) - (2.5002E-11)*N*log(N)*df*df f4_C5 = -74.138 + 74.773*(df^(1/3)) - 24.526*(df^(1/6))*log(df) + .40840*df*log(df) - .64950*(df^(7/6)) + (1.8737E-5)*df*df + .75171*(df^(2/3))/(N^(5/6)) + 3.0098*(df^(7/6))/(N^(5/3)) - 6.0023*(df^(4/3))/(N*N) f5_N5=-1.5934 - .0010345*(N^(5/6))/(df^(1/3)*log(df)) - .031556*log(N)*log(N)/log(df) + .045330*(N^(1/6))*log(N)/log(df) + .010637*(N^(1/3))*log(N)/(df^(1/6)*log(df)) + (7.2387E-8)*(N^(11/6))/(df*log(df)) # predicting 95% quantile f1_C95 = 4.3385 - 21.422/(N*log(N)) - 33.654*(df^(1/3)) + 58.396*sqrt(df) - 27.459*df + 9.0968*(df^(5/6))*log(df) + 2.2968*(df^(7/6)) - (1.1549E-4)*df*df - 2.0644*sqrt(df)/(sqrt(N)*log(N)) f2_C95 = 4.2738 - 10.209/(N*log(N)) - 33.243*(df^(1/3)) + 57.820*sqrt(df) - 27.198*df+9.0159*(df^(5/6))*log(df) + 2.2741*(df^(7/6)) - (1.1393E-4)*df*df + (3.0050E-5)*df*df/((N^(1/3))*log(N)) - 3.6995*sqrt(df)/(sqrt(N)*log(N)) + .68716*(log(df)*log(df))/(sqrt(N)*log(N)) f3_N95 = 1.6413 - 1.3614/(sqrt(N)*log(N)) f4_N95 = 1.6250 - .86420/(N*log(N)) + .0056051*log(df) - (4.0489E-4)*log(df)*log(df) - 1.0297*(df^(1/6))/N + 5.5384*log(df)*log(df)/(N^(5/3)) - 10.076*(df^(5/6))*log(df)/(N^(11/6)) + 11.465*(df^(3/2))/(N^(11/6)) - 6.6793*(df^(5/3))/(N^(11/6)) + 1.0516*(df^(11/6))/(N^(11/6)) f5_N95 = .21864 + 1.0097*(N^(1/6)) - .036540*log(N)*log(N) - (4.1371E-5)*(N^(2/3))*log(N) + .19742/((df^(5/6))*log(df)) - .57443/(df^(11/6)) - .012198*log(N)/sqrt(df) + .0070765*(N^(1/3))/((df^(1/6))*log(df)) - (7.7312E-4)*(N^(1/3))*log(N)/(log(df)*log(df)) + .0037831*(N^(1/3))*log(N)/(df*df) res = cbind(c(f1_N5,f2_N5,f3_N5,f4_C5,f5_N5), c(f1_C95,f2_C95,f3_N95,f4_N95,f5_N95)) rownames(res) = c('f1', 'f2', 'f3', 'f4', 'f5') ; colnames(res) = c('x_05', 'x_95') #f5 = c(f1_N5,f2_N5,f3_N5,f4_C5,f5_N5) #names(f5) = c('f1n5', 'f2n5', 'f3n5', 'f4c5', 'f5n5') #f95 = c(f1_C95,f2_C95,f3_N95,f4_N95,f5_N95) #names(f95) = c('f1c95', 'f2c95', 'f3n95', 'f4n95', 'f5n95') #res = list(x_05 = f5, x_95 = f95) return(res) } ### convert theta into the format used in other functions reformat_theta = function(theta_0){ fl0 = list(theta_0[c(1:3, 8:9)], theta_0[4:6], theta_0[7:9]) phi = matrix(0, nrow = 3, ncol = 3) phi[1, 1] = theta_0[9] phi[1, 2] = phi[2, 1] = theta_0[10] phi[3, 1] = phi[1, 3] = 0 phi[2, 2] = theta_0[11] phi[3, 2] = phi[2, 3] = theta_0[12] phi[3, 3] = theta_0[13] phi[lower.tri(phi, diag = T)] psi = theta_0[14:22] theta0 = list(fl0, phi[lower.tri(phi, diag = T)], theta_0[14:22]) names(theta0) = c("Loadings", "Factor.Corr", "Error.Var") return(theta0) } ###--------------------------------------------------------------------### # function to evaluate sigma0=sigma(theta0) # and the Jacobian matrix (dsigma0/dtheta) for conducting # Fisher-scoring algorithm, and the model sigma(theta) is # the one in Example 1 of the article sigdsig = function(theta0, group, p, m = 3){ # Derivative of lav_matrix_vech(Sigma), namely, sigma_dot #theta0 is a list of 3: factor loadings (sublists for each group), factor correlations, and error variances. # group should be a list of questions/manifest variables for each factor. names(group) = 1:m # factor membership label ##factor loadings Lambda = matrix(0, p, m) for (j in 1 : m){ Lambda[group[[j]], j] = theta0[[1]][[j]] } #this line is to add extra loadings # Lambda[cbind((1:m)*p/m, c(2:m,1))] = abc ##factor correlations Phi = diag(1, m) Phi[lower.tri(Phi, diag=T)] = theta0[[2]] #column-wise Phi = Phi + t(Phi) - diag(diag(Phi), m) ##error variances Psi = diag(theta0[[3]]) ##True poulation covariance matrix Sigma0 = Lambda%*%Phi%*%t(Lambda) + Psi vsig0 = lav_matrix_vech(Sigma0) lph = Lambda%*%Phi phl = Phi%*%t(Lambda) dsigma = array(0, dim = c(p, p, length(unlist(theta0)))) #1st-order derivative of Sigma d.vechS = matrix(0, length(vsig0), length(unlist(theta0))) #1st-order derivative of lav_matrix_vech(Sigma) ##derivative for Lambda ind = list() for(i in 1:length(group)){ ind[[i]] = rep(i, length(group[[i]])) } ind = unlist(ind) for(j in 1:length(group)){ for (i in 1 : length(group[[j]])){ d1 = matrix(0, p, m) d1[group[[j]][i], j] = 1 d1t = d1%*%phl+lph%*%t(d1) k = which(ind==j)[i] dsigma[,,k] = d1t # a p by p matrix d.vechS[,k] = lav_matrix_vech(d1t) } } ##derivative for Phi for (i in 1 : (m*(m+1)/2)){ d1 = diag(0, m) d1[lower.tri(d1, diag=T)][i] = 1 d1 = d1 + t(d1) - diag(diag(d1), m) d1t = Lambda%*%d1%*%t(Lambda) dsigma[,,(length(unlist(group))+i)] = d1t d.vechS[,(length(unlist(group))+i)] = lav_matrix_vech(d1t) } ##derivative for psi for (i in 1 : p){ d1 = diag(0, p) d1[i,i] = 1 dsigma[,,(length(unlist(theta0)) - p + i)] = d1 d.vechS[,(length(unlist(theta0)) - p + i)] = lav_matrix_vech(d1) } return(list(Sigma0 = Sigma0, vsig0 = vsig0, dsigma = dsigma, d.vechS = d.vechS)) } ###------------------------------------------------------------### #Estimating the structural model using Fisher-scoring algorithm; Fisher = function(data, m=3, S=NULL, group, theta0 = NULL, fix.first.loadings = T, iterations = 1000){ # Fisher scoring for computing T_ml and T_rml # group should be a list of questions/manifest variables for each factor. ## S=NULL:if S is provided (e.g. M-estimates), theta_hat and statistics will change ## Note!!! Double check if mu.M or regular mu should be used for Gamma in Trml ## fix.sd: fix factor variances in estimation; otherwise, fix 1st loading of each factor data = scale(data, center = TRUE, scale = TRUE) data = as.matrix(data) n = nrow(data) p = ncol(data) load1 = list() # first loading for each factor for(i in 1:length(group)){ load1[[i]] = group[[i]] load1[[i]][1] = NA } load1 = unlist(load1) load1 = which(is.na(load1)) if (is.null(S)){ #regular S is used S = cov(data)*(n-1)/n } vs = lav_matrix_vech(S) if(is.null(theta0)){ theta0 = initial(S, m, group, fix.first.loadings) #initial values } cross.paths = length(names(unlist(theta0$Loadings))) - p if(cross.paths !=0){ df = p*(p+1)/2 - p*2-m*(m-1)/2 - cross.paths + m } if(cross.paths == 0){ df = p*(p+1)/2 - p*2-m*(m-1)/2 } dup = rsem.DP(p) #duplication matrix ep = 1E-7 ep1 = 1E-15 itr = iterations # Iteration limit idiv = 0 # nonconvergence due to bad fit cor.ind = rep(1, m) corr.ind = rep(length(unlist(group)) + 1, m) for (k in 2:m){ cor.ind[k] = cor.ind[k-1] + m + 2 - k # in theta0 as list corr.ind[k] = corr.ind[k-1] + m + 2 - k # unlisted theta0 } for (i in 1 : itr+1){ if (i>itr){idiv = idiv+1; break} if (fix.first.loadings == FALSE){ theta0[[2]][cor.ind] = 1 deriv = sigdsig(theta0, group, p, m) # score function d.vechS = deriv$d.vechS[,-corr.ind] # sigma dot } else { # default in lavaan for(k in 1:length(group)){ theta0[[1]][[k]][1] = 1 # fixes first loadings to 1 } deriv = sigdsig(theta0, group, p, m) # score function d.vechS = deriv$d.vechS[,-load1] # sigma dot } Sigma = deriv$Sigma0 # model-implied cov matrix at theta0 Sigin = ginv(Sigma) # model-implied cov matrix inverse krok = Sigin%x%Sigin #krokner product V = 0.5*t(dup)%*%krok%*%dup # W MATRIX in the article dswe = t(d.vechS)%*%V #outer term dwd = dswe%*%d.vechS #negative second order derivative ddval = eigen(dwd)$values #if (tail(ddval,1).05, rejection occurs when T_ml<=c_a(epsilon=.05), # where c_a is the critical value of (T_ml|epsilon=.05) T_b = sort(T_b) temp = quantile(T_b, probs = c(.05, .95)) sc_5 = temp[1] sc_95 = temp[2] # smoothing critical value y = T_b n_b = n_conv # q_vec1 is normal quantiles, q_vec2 is chisq quantiles # coef_norm are values of x_05 and x_95 (Eq 5) # smooth gives c_alpha (Eq 4) temp = smooth(n = N_r, n_b = n_b, x = q_vec1, y = y, coef = coef_norm) hquant_n5 = temp$hquant5 ; hquant_n95 = temp$hquant95 temp = smooth(n = N_r, n_b = n_b, x = q_vec2, y = y, coef = coef_norm) hquant_c5 = temp$hquant5 ; hquant_c95 = temp$hquant95 qc_5 = cbind(n_b, sc_5, t(hquant_n5[1:3,]), hquant_c5[4,], hquant_n5[5,]) colnames(qc_5) = c('n_b', 'sc_5', 'fn5_1', 'fn5_2', 'fn5_3', 'fc5_4', 'fn5_5') qc_95 = cbind(n_b, sc_95, t(hquant_c95[1:2,]), t(hquant_n95[3:5,])) return(qc_5) } ###------------------------------------------------------------### # The following function goes through the main program and gives final results ###------------------------------------------------------------### main = function(holz){ x = as.matrix(holz) n = nrow(x) ; p = ncol(x) ; ps = p*(p+1)/2 group = list(g1 = c(1:3, 8,9), g2 = c(4:6), g3 = c(7:9)) all.res = list() ### rescale data by sc to make SD less than 2 sc = c(6.0, 4.0, 8.0, 3.0, 4.0, 7.0, 23.0, 20.0, 36.0) rsc = diag(1/sc) x = x%*%rsc Scov = cov(x) ###starting value using MLE theta_00=c(0.589, 0.807, 0.336, 0.655, 1.044, 0.973, 0.851, 0.544, 0.650, 0.375, 0.785, 0.115, 0.632, 0.495, 0.828, 0.561, 0.295, 0.342, 0.367, 0.334, 0.352, 0.408) theta_00 = t(t(theta_00)) q = nrow(theta_00) df = ps - q theta_0 = theta_00 N_r = 500 # number of bootstrap replications coef_norm = pred(N_r = 500, df = df) Dup = rsem.DP(p) # Estimate the model by Normal-distribution ML FS.theta = reformat_theta(theta_0) res = Fisher(data = as.data.frame(x), m = 3, group = group, theta0 = FS.theta) Sig0 = res$Sigma0 ; T_ml = res$stat[,'T_ml'] ; div = res$stat[, 'noncvg'] if(div==0){ Sig_0 = Sig0 } all.res[[1]] = c(T_ml, n, N_r) names(all.res[[1]]) = c('Model chi-square T_ml', 'sample size', 'bootstrap rep') # For smoothing critical value q_vec1 = q_vec2 = matrix(0, nrow = N_r, ncol = 1) for(i in 1:N_r){ p_i = i/(N_r+1) q_vec1[i] = qnorm(p_i) q_vec2[i] = qchisq(p_i, df) } seed=1111111111 ## Standardize data to satisfy the null model Sigma(theta) x_c = scale(x, center = T, scale = F) sval = eigen(Scov)$values svec = eigen(Scov)$vectors Scov12_in = svec%*%diag(1/sqrt(sval))%*%t(svec) # the Sigma^{-1/2} # 1 start with a=0, T_ml will be greater than the c_alpha(a=0), # c_alpha(a=0)c_alpha increase by .001 # if T_ml is smaller than c_alpha0 then ep_0 is too large, # decrease a by .1 # if T_ml is greater than c_alpha1 then ep_0 is too small, increase a #--------------------------------# # The following code uses the bootstrap approach to solve # equation (8) as described in the section # "Bootstrap Approach to Equivalence Testing # for Mean and Covariance Structural Models" result_1 = matrix(0, nrow = 10, ncol = 9) # contain the results when h changes by .1 result_2 = matrix(0, nrow = 10, ncol = 9) # contain the results when h changes by .01 result_3 = matrix(0, nrow = 10, ncol = 9) # contain the results when h changes by .001 result_4 = matrix(0, nrow = 10, ncol = 9) # contain the results when h changes by .0001 hat_c5 = 0 a = 0 ; N_c1 = 0 # qc_5=n_b||sc_5||(hquant_n5[1:3]`)||hquant_c5[4]||hquant_n5[5] i = 1 while((hat_c5T_ml) & (i<=10)){ a = a - .01 theta_0 = theta_00 qc_5 = c_alpha(N_r = N_r, a = a, dup = Dup, x_c = x_c, Scov = Scov, Scov12_in = Scov12_in, theta_0 = theta_0, Sig0 = Sig0, seed = seed, q_vec1 = q_vec1, q_vec2 = q_vec2, coef_norm = coef_norm, group = group) result_2[i,] = c(i, a, qc_5) N_c2 = N_c2 + 1 hat_c5 = qc_5[3] i = i + 1 } result_2 = result_2[1:N_c2,] N_c3 = 0 ; i = 1 while((hat_c5T_ml) & (i<=10)){ a = a - .0001 theta_0 = theta_00 qc_5 = c_alpha(N_r = N_r, a = a, dup = Dup, x_c = x_c, Scov = Scov, Scov12_in = Scov12_in, theta_0 = theta_0, Sig0 = Sig0, seed = seed, q_vec1 = q_vec1, q_vec2 = q_vec2, coef_norm = coef_norm, group = group) result_4[i,] = c(i, a, qc_5) N_c4 = N_c4 + 1 hat_c5 = qc_5[3] i = i + 1 } result_4 = result_4[1:N_c4,] gen.names = c('a', 'n_conv', 'sc_5', 'fn5_1', 'fn5_2', 'fn5_3', 'fc5_4', 'fn5_5') colnames(result_1) = c('i1', gen.names) colnames(result_2) = c('i2', gen.names) colnames(result_3) = c('i3', gen.names) colnames(result_4) = c('i4', gen.names) # qc_5=n_b||sc_5||(hquant_n5[1:3]`)||hquant_c5[4]||hquant_n5[5] all.res[[2]] = result_1 ; names(all.res[2]) = 'result1' all.res[[3]] = result_2 ; names(all.res[3]) = 'result2' all.res[[4]] = result_3 ; names(all.res[4]) = 'result3' all.res[[5]] = result_4 ; names(all.res[5]) = 'result4' names(all.res)[2:5] = paste('result', 1:4, sep = '') names(all.res)[1] = 'Model Estimated by NML' a_1 = result_1[N_c1,2] Sig_a1 = a_1*Scov + (1-a_1)*Sig_0 theta_0 = theta_00 FS.theta = reformat_theta(theta_0) res = Fisher(data = as.data.frame(x), S = Sig_a1, m = 3, group = group, theta0 = FS.theta) Sig0 = res$Sigma0 ; T_ml = res$stat[,'T_ml'] ; div = res$stat[, 'noncvg'] if(div==0){ F_mla = T_ml/(n-1) RMSEA_a = sqrt(F_mla/df) temp = c(a_1, T_ml, F_mla, RMSEA_a) names(temp) = c('a_1', 'T_mla', 'F_mla', 'RMSEA_a') all.res[[6]] = temp } if(div==1){ all.res[[6]] = 'a_1 did not converge' } a_2 = result_2[N_c2,2] Sig_a2 = a_2*Scov + (1-a_2)*Sig_0; theta_0 = theta_00 FS.theta = reformat_theta(theta_0) res = Fisher(data = as.data.frame(x), S = Sig_a2, m = 3, group = group, theta0 = FS.theta) Sig0 = res$Sigma0 ; T_ml = res$stat[,'T_ml'] ; div = res$stat[, 'noncvg'] if(div==0){ F_mla = T_ml/(n-1) RMSEA_a = sqrt(F_mla/df) temp = c(a_2, T_ml, F_mla, RMSEA_a) names(temp) = c('a_2', 'T_mla', 'F_mla', 'RMSEA_a') all.res[[7]] = temp } if(div==1){ all.res[[7]] = 'a_2 did not converge' } a_3 = result_3[N_c3,2] Sig_a3 = a_3*Scov+(1-a_3)*Sig_0; theta_0 = theta_00 FS.theta = reformat_theta(theta_0) res = Fisher(data = as.data.frame(x), S = Sig_a3, m = 3, group = group, theta0 = FS.theta) Sig0 = res$Sigma0 ; T_ml = res$stat[,'T_ml'] ; div = res$stat[, 'noncvg'] if(div==0){ F_mla = T_ml/(n-1) RMSEA_a = sqrt(F_mla/df) temp = c(a_3, T_ml, F_mla, RMSEA_a) names(temp) = c('a_3', 'T_mla', 'F_mla', 'RMSEA_a') all.res[[8]] = temp } if(div==1){ all.res[[8]] = 'a_3 did not converge' } a_4 = result_4[N_c4,2] Sig_a4 = a_4*Scov+(1-a_4)*Sig_0; theta_0 = theta_00 FS.theta = reformat_theta(theta_0) res = Fisher(data = as.data.frame(x), S = Sig_a4, m = 3, group = group, theta0 = FS.theta) Sig0 = res$Sigma0 ; T_ml = res$stat[,'T_ml'] ; div = res$stat[, 'noncvg'] if(div==0){ F_mla = T_ml/(n-1) RMSEA_a = sqrt(F_mla/df) temp = c(a_4, T_ml, F_mla, RMSEA_a) names(temp) = c('a_4', 'T_mla', 'F_mla', 'RMSEA_a') all.res[[9]] = temp } if(div==1){ all.res[[9]] = 'a_4 did not converge' } names(all.res)[6:9] = paste('a', 1:4, sep = '') return(all.res) } ###------------------------------------------------------------### ######====== 3. Load data and run the main function =======###### #-------------------------------------------------------------------- # Data is from Holzinger and Swinford (1939) # rescaled to have SD less than 2 # 9-variables, 3-factor, loadings fixed at 1.0 # Variable 8 and 9 also load on factor 1 # --------------------------------------------------------- # 26 variables but we only use 9-variable out of them holz = read.table('holz9.dat') # Run the main function and get results in the object 'out' set.seed(1) out = main(holz)