***------------------------------------------------------------***; ** Data FROM Wheaton et al (1977) Sociological Methodology; *** The model is LISREL type model; *** All tests are based on normal theory estimates; ***------------------------------------------------------------***; ***------------------------------------------------------------***; options ls=75 nodate nonumber; options nodate; title; proc iml; reset noname; ***--------------------------------------------------------------------***; **** The following subroutine is to perform vech(.) function; ***--------------------------------------------------------------------***; start vech(A,Va); l=0; p=nrow(A); pstar=p*(p+1)/2; Va=j(pstar,1,0); do i=1 to p by 1; do j=i to p by 1; l=l+1; Va[l]=A[j,i]; end; end; finish; ***--------------------------------------------------------------------***; * creates duplication matrix as defined in Magnus&Neudecker*; ***--------------------------------------------------------------------***; start DP(p, dup); Dup=j(p*p,p*(p+1)/2,0); count=0; do j=1 to p; do i=j to p; count=count+1; if i=j then do; Dup[(j-1)*p+j, count]=1; end; else do; Dup[(j-1)*p+i, count]=1; Dup[(i-1)*p+j, count]=1; end; end; end; finish; ***--------------------------------------------------------------------***; **** The following subroutine is to evaluate sigma0 and (dsigma0/dtheta); ***--------------------------------------------------------------------***; start sigdsig_sem(p,theta0,vsig,Sigma,vdsig); p_x=2; p_y=4; ps=p*(p+1)/2; lamb_x=theta0[1:p_x]; lamb_xt=lamb_x`; lamb_y=j(p_y,2,0); lamb_y[1,1]=1.0; lamb_y[2,1]=theta0[3]; lamb_y[3,2]=1.0; lamb_y[4,2]=theta0[4]; lamb_yt=lamb_y`; gamma=theta0[5:6]; gamma_t=gamma`; **AB=I-B in LISREL notation; Beta=j(2,2,0); Beta[2,1]=theta0[7]; A=i(2); AB=A-Beta; phi=1.0; sig_zeta=diag(theta0[8:9]); psi_x=diag(theta0[10:11]); psi_y=diag(theta0[12:15]); *computing Sigma based on structured parameters; *computing Sigma based on structured parameters; ABin=inv(AB); ABin_t=ABin`; sigyy=lamb_y*ABin*(gamma*phi*gamma_t+sig_zeta)*ABin_t*lamb_yt+psi_y; sigxy=lamb_x*phi*gamma_t*ABin_t*lamb_yt; sigxx=lamb_x*phi*lamb_xt+psi_x; sigma=j(6,6,0); sigma[1:2,1:2]=sigxx; sigma[1:2,3:6]=sigxy; sigma[3:6,1:2]=sigxy`; sigma[3:6,3:6]=sigyy; run vech(sigma,vsig); ***** The following is to evaluate (dsigma0/dtheta); *dlamb_x is the derivative with Lamb_x; dlamb_x=j(ps,p_x,0); t1=phi*gamma_t*ABin_t*lamb_yt; phl=phi*lamb_xt; lph=lamb_x*phi; tyy=j(p_y,p_y,0); do j=1 to p_x; tt=j(p_x,1,0); tt[j]=1; txx=tt*phl+lph*(tt`); txy=tt*t1; ttt=(txx||txy)//(txy`||tyy); run vech(ttt,tttt); dlamb_x[,j]=tttt; end; *dlamb_y is the derivative with Lamb_y; dlamb_y=j(ps,2,0); t1=ABin*(gamma*phi*gamma_t+sig_zeta)*ABin_t; t2=lamb_x*phi*gamma_t*ABin_t; tly1=t1*lamb_yt; lyt1=lamb_y*t1; txx=j(p_x,p_x,0); j=1; tt=j(p_y,2,0); tt[j+1,1]=1; tyy=tt*tly1+lyt1*(tt`); txy=t2*(tt`); ttt=(txx||txy)//((txy`)||tyy); run vech(ttt,tttt); dlamb_y[,j]=tttt; j=2; tt=j(p_y,2,0); tt[j+2,2]=1; tyy=tt*tly1+lyt1*(tt`); txy=t2*(tt`); ttt=(txx||txy)//((txy`)||tyy); run vech(ttt,tttt); dlamb_y[,j]=tttt; *dgamma is the derivative with gamma; dgamma=j(ps,2,0); t1=lamb_y*ABin; t2=lamb_x*phi; txx=j(p_x,p_x,0); do j=1 to 2; tt=j(2,1,0); tt[j]=1; tyy=t1*(tt*phi*gamma_t+gamma*phi*(tt`))*(t1`); txy=t2*(tt`)*(t1`); ttt=(txx||txy)//((txy`)||tyy); run vech(ttt,tttt); dgamma[,j]=tttt; end; *dbeta is the derivative with beta; t1=ABin*(gamma*phi*gamma_t+sig_zeta)*ABin_t; t2=lamb_x*phi*gamma_t*ABin_t; t3=lamb_y*ABin; txx=j(p_x,p_x,0); tt=j(2,2,0); tt[2,1]=1; tyy=t3*tt*t1*lamb_yt+lamb_y*t1*(tt`)*(t3`); txy=t2*(tt`)*(t3`); ttt=(txx||txy)//((txy`)||tyy); run vech(ttt,tttt); dbeta=tttt; *dphi is the derivative with phi; *txx=lamb_x*lamb_xt; *t1=lamb_y*ABin*gamma; *tyy=t1*t1`; *txy=lamb_x*t1`; *ttt=(txx||txy)//(txy`||tyy); *run vech(ttt,tttt); *dphi=tttt; *dsig_zeta is the derivative with sig_zeta; dsig_zeta=j(ps,2,0); t1=lamb_y*ABin; txx=j(p_x,p_x,0); txy=j(p_x,p_y,0); do j=1 to 2; tt=j(2,2,0); tt[j,j]=1; tyy=t1*tt*t1`; ttt=(txx||txy)//(txy`||tyy); run vech(ttt,tttt); dsig_zeta[,j]=tttt; end; *dpsi_x is the derivative with psi_x; dpsi_x=j(ps,p_x,0); tyy=j(p_y,p_y,0); txy=j(p_x,p_y,0); do j=1 to p_x; tt=j(p_x,p_x,0); tt[j,j]=1; txx=tt; ttt=(txx||txy)//((txy`)||tyy); run vech(ttt,tttt); dpsi_x[,j]=tttt; end; *dpsi_y is the derivative with psi_y; dpsi_y=j(ps,p_y,0); txx=j(p_x,p_x,0); txy=j(p_x,p_y,0); do j=1 to p_y; tt=j(p_y,p_y,0); tt[j,j]=1; tyy=tt; ttt=(txx||txy)//((txy`)||tyy); run vech(ttt,tttt); dpsi_y[,j]=tttt; end; vdsig=dlamb_x||dlamb_y||dgamma||dbeta||dsig_zeta||dpsi_x||dpsi_y; finish; ***--------------------------------------------------------------------***; **Subroutine for parameter estimation by minimizing the F_ml function using Fisher-scoring method**; **coefficients of factor-scores are subsequently obtained; ***--------------------------------------------------------------------***; start ML_SEM(n, p, dup, theta0,scov, T_ml,htheta_z,coef_xt,coef_yt,n_div); **if idiv=1 then model does not converge; p_x=2; p_y=4; ep=.00001; **ep=E-5; ep1=.00000000000000000001; **ep1=E-20; ps=p*(p+1)/2; q=nrow(theta0); df=ps-q; dtheta=1; n_div=1; do jj=1 to 300 while (max(abs(dtheta))>ep); run sigdsig_sem(p,theta0,vsig0,Sigma0, vdsig); sig_in=inv(Sigma0); ** weight given by normal theory ; weight=0.5*dup`*(sig_in@sig_in)*dup; vdsig_t=vdsig`; dsw=vdsig_t*weight; dswds=dsw*vdsig; dwd_in=inv(dswds); cov_res=scov-sigma0; run vech(cov_res,vcov_res); dtheta=dwd_in*dsw*vcov_res; theta0=theta0+dtheta; end; if jj<300 then do; n_div=0; Ssig_in=Scov*sig_in; f_ml=trace(ssig_in)-log(det(ssig_in))-p; T_ml=(n-1)*f_ml; SE_hat=sqrt(vecdiag(dwd_in)/(n-1)); *SE_hat=sqrt(vecdiag(dwd_in)/n); z_score=theta0/SE_hat; htheta_z=theta0||SE_hat||z_score; *computation of factor-score; lamb_x=theta0[1:p_x]; lamb_xt=lamb_x`; lamb_y=j(p_y,2,0); lamb_y[1,1]=1.0; lamb_y[2,1]=theta0[3]; lamb_y[3,2]=1.0; lamb_y[4,2]=theta0[4]; lamb_yt=lamb_y`; psi_x=diag(theta0[10:11]); psi_y=diag(theta0[12:15]); psi_xin=inv(psi_x); psi_yin=inv(psi_y); coef_xt=inv(lamb_xt*psi_xin*lamb_x)*lamb_xt*psi_xin; coef_yt=inv(lamb_yt*psi_yin*lamb_y)*lamb_yt*psi_yin; end; finish; *--------------------------------------------; *LS regression using the sample covariance matrix; ******************************************************************; start regress(n,c_xt,c_yt,scov,reg_est, Rsq); *xy; p_x=2;p_y=4; p=p_x+p_y; scov=(n-1)*scov/n; scov_xx=scov[1:p_x,1:p_x]; scov_xy=scov[1:p_x,(p_x+1):p]; scov_yx=scov_xy`; scov_yy=scov[(p_x+1):p,(p_x+1):p]; c_x=c_xt`; c_y=c_yt`; s_xx=c_xt*scov_xx*c_x; s_xy=c_xt*scov_xy*c_y; s_yy=c_yt*scov_yy*c_y; *----------------; *y1=a x+e_1; hat_a=s_xy[1]/s_xx; hsig2_a=n*(s_yy[1,1]-s_xy[1]*s_xy[1]/s_xx)/(n-2); Var_a=hsig2_a/(n*s_xx); SE_a=sqrt(Var_a); z_a=hat_a/SE_a; Rsq_1=1-(s_yy[1,1]-s_xy[1]*s_xy[1]/s_xx)/s_yy[1,1]; *----------------; *y2=c x+b y1+e_2; cc_mat=(s_xx||s_xy[1])//(s_xy[1]||s_yy[1,1]); c_vec=s_xy[2]//s_yy[1,2]; hat_cb=inv(cc_mat)*c_vec; c_vect=c_vec`; hsig2_2=n*(s_yy[2,2]-c_vect*inv(cc_mat)*c_vec)/(n-3); Cov_cb=hsig2_2*inv(n*cc_mat); SE_cb=sqrt(vecdiag(Cov_cb)); z_cb=hat_cb/SE_cb; Rsq_2=1-(s_yy[2,2]-c_vect*inv(cc_mat)*c_vec)/s_yy[2,2]; *----------------; reg_est=j(3,3,0); reg_est[1,]=hat_a||SE_a||z_a; reg_est[2:3,]=(hat_cb||SE_cb||z_cb); Rsq=Rsq_1||Rsq_2; finish; *--------------------------------------------; *Transforming weight of model A to B using structured Sigma=h*w*w'+Psi; *--------------------------------------------; start ssw(w,S,w_ss); p=nrow(S); w_t=w`; c2=w_t*S*w; d_s=vecdiag(S); t1=c2-sum(w#w#d_s); t2=ssq(w)*ssq(w)-sum(w#w#w#w); h=t1/t2; psi=d_s-h*w#w; *print "p=" p; do j=1 to p; psi_j=psi[j]; if psi_j<0 then do; print "Heywood case, j=" j psi_j; psi[j]=.05; end; end; w_s=w/psi; w_ss=w_s/sqrt(w_s`*S*w_s); hlamb=sqrt(h)*w; hlamb2=hlamb#hlamb; *maximal reliability coefficient; a_1=sum(hlamb2); a_12=a_1*a_1; a_2=sum(hlamb2#psi); rho_A=a_12/(a_12+a_2); b_1=sum(hlamb2/psi); rho_B=b_1/(1+b_1); *print "w psi w_s, D_s, lamb, lamb2=" w psi w_s d_s hlamb hlamb2; *print "PLS: p_j, rho_A, rho_B=" p rho_A rho_B; finish; *--------------------------------------------; *Subroutine conducting PLS-SEM mode A, and B_A; *--------------------------------------------; start PLS_A(n,scov0, reg_plsA,reg_plsBA, Rsq_A, Rsq_BA); scov=(n-1)*scov0/n; p_x=2; p_y1=2; p_y2=2; s_xx=scov[1:2,1:2];s_xy1=scov[1:2,3:4];s_xy2=scov[1:2,5:6]; s_y1x=s_xy1`; s_y11=scov[3:4,3:4];s_y12=scov[3:4,5:6]; s_y2x=s_xy2`; s_y21=s_y12`; s_y22=scov[5:6,5:6]; *--------------------; *starting value of weights; w_x=j(p_x,1,1); c2_x=w_x`*s_xx*w_x; w_x=w_x/sqrt(c2_x); w_y1=j(p_y1,1,1); c2_y1=w_y1`*s_y11*w_y1; w_y1=w_y1/sqrt(c2_y1); w_y2=j(p_y2,1,1); c2_y2=w_y2`*s_y22*w_y2; w_y2=w_y2/sqrt(c2_y2); *initial composites; *eta_x=x*w_x; *eta_y1=y1*w_y1; *eta_y2=y2*w_y2; *environment variables; r_xy1=w_x`*s_xy1*w_y1; r_xy2=w_x`*s_xy2*w_y2; r_y12=w_y1`*s_y12*w_y2; *ev_x=sign(r_xy1)*eta_y1+sign(r_xy2)*eta_y2; *ev_y1=sign(r_xy1)*eta_x+sign(r_y12)*eta_y2; *ev_y2=sign(r_xy2)*eta_x+sign(r_y12)*eta_y1; *ev_x=ev_x/sqrt(ssq(ev_x)/n); *ev_y1=ev_y1/sqrt(ssq(ev_y1)/n); *ev_y2=ev_y2/sqrt(ssq(ev_y2)/n); *w_x0=w_x; *w_y10=w_y1; *w_y20=w_y2; w_0=w_x//w_y1//w_y2; *print "-------------mode A-------------"; dw=1; ep=.00001; do j=1 to 50 while (max(abs(dw))>ep); *w_x=x_t*ev_y1/n, ev_x=sign(r_xy1)*eta_y1+sign(r_xy2)*eta_y2; w_x=sign(r_xy1)*s_xy1*w_y1+sign(r_xy2)*s_xy2*w_y2; *w_x is the vector of regression coefficients ev_x-->x, var(ev_x)=1; c2_x=w_x`*s_xx*w_x; w_x=sign(w_x[1])*w_x/sqrt(c2_x); *eta_x=x*w_x; *eta_x is the vector of predicted score; *w_y1=y1_t*ev_y1/n, ev_y1=sign(r_xy1)*eta_x+sign(r_y12)*eta_y2; w_y1=sign(r_xy1)*(s_y1x)*w_x+sign(r_y12)*s_y12*w_y2; c2_y1=w_y1`*s_y11*w_y1; w_y1=sign(w_y1[1])*w_y1/sqrt(c2_y1); *w_y2=y2_t*ev_y2/n, *ev_y2=sign(r_xy2)*eta_x+sign(r_y12)*eta_y1; w_y2=sign(r_xy2)*s_y2x*w_x+sign(r_y12)*s_y21*w_y1; c2_y2=w_y2`*s_y22*w_y2; w_y2=sign(w_y2[1])*w_y2/sqrt(c2_y2); *Note that for the corrected model, the estimates do not depend the sign infront of eta_y or ev_y, how about misspecified models?; *environment variables; r_xy1=w_x`*s_xy1*w_y1; r_xy2=w_x`*s_xy2*w_y2; r_y12=w_y1`*s_y12*w_y2; w_j=w_x//w_y1//w_y2; dw=w_j-w_0; w_0=w_j; end; print "mode A iteration time=" j; w_a=w_j; w_xt=w_x`; w_y=(w_y1||j(2,1,0))//(j(2,1,0)||w_y2); w_yt=w_y`; scov=scov0; run regress(n,w_xt,w_yt,scov,reg_plsA, Rsq_A); *print "--------mode A structured to mode B--------"; *print "--------block-x---------"; run ssw(w_x,S_xx,w_sx); *print "--------block-y1---------"; run ssw(w_y1,S_y11,w_sy1); *print "--------block-y2---------"; run ssw(w_y2,S_y22,w_sy2); w_sb=w_sx//w_sy1//w_sy2; *print "---------------------------------weights model 1---------------------------------"; *print "mode A(w_x,w_y1,w_y2)--mode BA(w_x,w_y1,w_y2)"; *print w_x w_y1 w_y2 w_sx w_sy1 w_sy2; w_sxt=w_sx`; w_sy=(w_sy1||j(2,1,0))//(j(2,1,0)||w_sy2); w_syt=w_sy`; scov=scov0; run regress(n,w_sxt,w_syt,scov,reg_plsBA, Rsq_BA); finish; ***--------------------------------------------------------------------***; *** The following is the main program; ***--------------------------------------------------------------------***; start main; print "--------------------------------------------------------------------"; print "--------------------------------------------------------------------"; print "Wheaton et al (1977) Sociological Methodology"; print "---------------------------------------------------------"; n=932; p=6; ps=p*(p+1)/2; print "N, p=" N p; p_x=2; p_y=4; scov=j(6,6,0); *The sample covariance matrix is from page 21 of EQS6 manual (Bentler, 2006); scov_vec={ 11.834 6.947 9.364 6.819 5.091 12.532 4.783 5.028 7.495 9.986 -3.839 -3.889 -3.841 -3.625 9.610 -2.1899 -1.8831 -2.1748 -1.8775 3.5522 4.50288}; c=0; do i=1 to 6; do j=1 to i; c=c+1; scov[i,j]=scov_vec[c]; scov[j,i]=scov_vec[c]; end; end; *print "scov=" scov; scov_a=scov[{5,6,1,2,3,4},{5,6,1,2,3,4}]; *rearranging the order of the variables; *print "scov_a=" scov_a; a0={ 2.582 1.376 .889 .849 -1.585 -.450 .705 5.307 3.741 2.944 2.610 4.015 3.192 3.702 3.624 }; a0=a0`; *starting values for ML estimation; q=nrow(a0); df0=ps-q; print "df0=" df0; run Dp(p,dup); theta0=a0; run ML_SEM(n, p, dup, theta0,scov_a, T_ml,htheta_z,coef_xt,coef_yt,n_div); if n_div=1 then do; print "Fisher-scoring algorithm did not converge within 300 iterations"; end; else do; p_v=1-probchi(T_ml,df0); print "(CB-SEM) T_ml, df, p_value=" T_ml df0 p_v; print "(CB-SEM) htheta--SE--Z"; print htheta_z[format=f8.3]; hgamma_z=htheta_z[5:7,]; *------------------R-square under CB-SEM------------------; gamma_11=theta0[5]; gamma_21=theta0[6]; beta_21=theta0[7]; *eta1=gamma_11*xi_1+zeta_1; Var_eta1=gamma_11*gamma_11+theta0[8]; Rsq_cb1=1-theta0[8]/Var_eta1; *eta2=gamma_21*xi_1+beta_21*eta_1+zeta_2 =gamma_21*xi_1+beta_21(gamma_11*xi_1+zeta_1)+zeta_2 =gamma_21*xi_1+beta_21*gamma_11*xi_1+beta_21*zeta_1+zeta_2 =(gamma_21+beta_21*gamma_11)*xi_1+beta_21*zeta_1+zeta_2; beta_2=gamma_21+beta_21*gamma_11; Var_eta2=beta_2*beta_2+beta_21*beta_21*theta0[8]+theta0[9]; Rsq_cb2=1-theta0[9]/Var_eta2; Rsq_cb=Rsq_cb1||Rsq_cb2; *---------------------------------------------------------; run regress(n,coef_xt,coef_yt,scov_a,reg_fsr, Rsq_fsr); *----------EWC reg-------------; lamb_x=theta0[1:2]; lamb_y1=1.0//theta0[3]; lamb_y2=1.0//theta0[4]; c_x=j(2,1,1)/(j(1,2,1)*lamb_x); c_y1=j(2,1,1)/(j(1,2,1)*lamb_y1); c_y2=j(2,1,1)/(j(1,2,1)*lamb_y2); c_xt=c_x`; c_y=(c_y1||j(2,1,0))//(j(2,1,0)||c_y2); c_yt=c_y`; run regress(n,c_xt,c_yt,scov_a,reg_ewc, Rsq_ewc); print "[gamma_11//gamma_21//b_21](hat\gamma, SE, z)"; print "---CB-SEM--- ---FSR--- ---EWC---"; print hgamma_z[format=f7.3] reg_fsr[format=f7.3] reg_ewc[format=f7.3]; *-------------PLS-SEM---------------; run PLS_A(n,scov_a, reg_plsA,reg_plsBA, Rsq_A, Rsq_BA); print "---plsA------plsB_A---"; print reg_plsA[format=f7.3] reg_plsBA[format=f7.3]; *Rsq_all=(Rsq_cb//Rsq_fsr//Rsq_ewc//Rsq_A//Rsq_BA)`; *print "R-square(CB-SEM,FSR,EWCR,PLSA,PLSAB)"; *print Rsq_all[format=f6.3]; end; finish; run main;