Simulate data from a general SEM model including non-linear effects and general link and distribution of variables.
## S3 method for class 'lvm'sim(x, n =NULL, p =NULL, normal =FALSE, cond =FALSE,sigma =1, rho =0.5, X =NULL, unlink=FALSE, latent=TRUE,use.labels =TRUE, seed=NULL,...)
Arguments
x: Model object
...: Additional arguments to be passed to the low level functions
n: Number of simulated values/individuals
p: Parameter value (optional)
normal: Logical indicating whether to simulate data from a multivariate normal distribution conditional on exogenous variables hence ignoring functional/distribution definition
cond: for internal use
sigma: Default residual variance (1)
rho: Default covariance parameter (0.5)
X: Optional matrix of fixed values of variables (manipulation)
unlink: Return Inverse link transformed data
latent: Include latent variables (default TRUE)
use.labels: convert categorical variables to factors before applying transformation
seed: Random seed
Examples
#################################################### Logistic regression##################################################m <- lvm(y~x+z)regression(m)<- x~z
distribution(m,~y+z)<- binomial.lvm("logit")d <- sim(m,1e3)head(d)e <- estimate(m,d,estimator="glm")e
## Simulate a few observation from estimated modelsim(e,n=5)#################################################### Poisson##################################################distribution(m,~y)<- poisson.lvm()d <- sim(m,1e4,p=c(y=-1,"y~x"=2,z=1))head(d)estimate(m,d,estimator="glm")mean(d$z); lava:::expit(1)summary(lm(y~x,sim(lvm(y[1:2]~4*x),1e3)))##################################################### Gamma distribution##################################################m <- lvm(y~x)distribution(m,~y+x)<- list(Gamma.lvm(shape=2),binomial.lvm())intercept(m,~y)<-0.5d <- sim(m,1e4)summary(g <- glm(y~x,family=Gamma(),data=d))## Not run: MASS::gamma.shape(g)args(lava::Gamma.lvm)distribution(m,~y)<- Gamma.lvm(shape=2,log=TRUE)sim(m,10,p=c(y=0.5))[,"y"]##################################################### Beta##################################################m <- lvm()distribution(m,~y)<- beta.lvm(alpha=2,beta=1)var(sim(m,100,"y,y"=2))distribution(m,~y)<- beta.lvm(alpha=2,beta=1,scale=FALSE)var(sim(m,100))##################################################### Transform##################################################m <- lvm()transform(m,xz~x+z)<-function(x) x[1]*(x[2]>0)regression(m)<- y~x+z+xz
d <- sim(m,1e3)summary(lm(y~x+z + x*I(z>0),d))##################################################### Non-random variables##################################################m <- lvm()distribution(m,~x+z+v+w)<- list(Sequence.lvm(0,5),## Seq. 0 to 5 by 1/n Binary.lvm(),## Vector of ones Binary.lvm(0.5),## 0.5n 0, 0.5n 1 Binary.lvm(interval=list(c(0.3,0.5),c(0.8,1))))sim(m,10)##################################################### Cox model### piecewise constant hazard################################################m <- lvm(t~x)rates <- c(1,0.5); cuts <- c(0,5)## Constant rate: 1 in [0,5), 0.5 in [5,Inf)distribution(m,~t)<- coxExponential.lvm(rate=rates,timecut=cuts)## Not run: d <- sim(m,2e4,p=c("t~x"=0.1)); d$status <-TRUE plot(timereg::aalen(survival::Surv(t,status)~x,data=d, resample.iid=0,robust=0),spec=1) L <- approxfun(c(cuts,max(d$t)),f=1, cumsum(c(0,rates*diff(c(cuts,max(d$t))))), method="linear") curve(L,0,100,add=TRUE,col="blue")## End(Not run)##################################################### Cox model### piecewise constant hazard, gamma frailty##################################################m <- lvm(y~x+z)rates <- c(0.3,0.5); cuts <- c(0,5)distribution(m,~y+z)<- list(coxExponential.lvm(rate=rates,timecut=cuts), loggamma.lvm(rate=1,shape=1))## Not run: d <- sim(m,2e4,p=c("y~x"=0,"y~z"=0)); d$status <-TRUE plot(timereg::aalen(survival::Surv(y,status)~x,data=d, resample.iid=0,robust=0),spec=1) L <- approxfun(c(cuts,max(d$y)),f=1, cumsum(c(0,rates*diff(c(cuts,max(d$y))))), method="linear") curve(L,0,100,add=TRUE,col="blue")## End(Not run)## Equivalent via transform (here with Aalens additive hazard model)m <- lvm(y~x)distribution(m,~y)<- aalenExponential.lvm(rate=rates,timecut=cuts)distribution(m,~z)<- Gamma.lvm(rate=1,shape=1)transform(m,t~y+z)<- prod
sim(m,10)## Shared frailtym <- lvm(c(t1,t2)~x+z)rates <- c(1,0.5); cuts <- c(0,5)distribution(m,~y)<- aalenExponential.lvm(rate=rates,timecut=cuts)distribution(m,~z)<- loggamma.lvm(rate=1,shape=1)## Not run:mets::fast.reshape(sim(m,100),varying="t")## End(Not run)##################################################### General multivariate distributions#################################################### Not run:m <- lvm()distribution(m,~y1+y2,oratio=4)<- VGAM::rbiplackcop
ksmooth2(sim(m,1e4),rgl=FALSE,theta=-20,phi=25)m <- lvm()distribution(m,~z1+z2,"or1")<- VGAM::rbiplackcop
distribution(m,~y1+y2,"or2")<- VGAM::rbiplackcop
sim(m,10,p=c(or1=0.1,or2=4))## End(Not run)m <- lvm()distribution(m,~y1+y2+y3,TRUE)<-function(n,...) rmvn0(n,sigma=diag(3)+1)var(sim(m,100))## Syntax also useful for univariate generators, e.g.m <- lvm(y~x+z)distribution(m,~y,TRUE)<-function(n) rnorm(n,mean=1000)sim(m,5)distribution(m,~y,"m1",0)<- rnorm
sim(m,5)sim(m,5,p=c(m1=100))##################################################### Regression design in other parameters#################################################### Variance heterogeneitym <- lvm(y~x)distribution(m,~y)<-function(n,mean,x) rnorm(n,mean,exp(x)^.5)if(interactive()) plot(y~x,sim(m,1e3))## Alternaively, calculate the standard error directlyaddvar(m)<-~sd ## If 'sd' should be part of the resulting data.frameconstrain(m,sd~x)<-function(x) exp(x)^.5distribution(m,~y)<-function(n,mean,sd) rnorm(n,mean,sd)if(interactive()) plot(y~x,sim(m,1e3))## Regression on variance parameterm <- lvm()regression(m)<- y~x
regression(m)<- v~x
##distribution(m,~v) <- 0 # No stochastic term## Alternative:## regression(m) <- v[NA:0]~xdistribution(m,~y)<-function(n,mean,v) rnorm(n,mean,exp(v)^.5)if(interactive()) plot(y~x,sim(m,1e3))## Regression on shape parameter in Weibull modelm <- lvm()regression(m)<- y ~ z+v
regression(m)<- s ~ exp(0.6*x-0.5*z)distribution(m,~x+z)<- binomial.lvm()distribution(m,~cens)<- coxWeibull.lvm(scale=1)distribution(m,~y)<- coxWeibull.lvm(scale=0.1,shape=~s)eventTime(m)<- time ~ min(y=1,cens=0)if(interactive()){ d <- sim(m,1e3) require(survival)(cc <- coxph(Surv(time,status)~v+strata(x,z),data=d)) plot(survfit(cc),col=1:4,mark.time=FALSE)}##################################################### Categorical predictor##################################################m <- lvm()## categorical(m,K=3) <- "v"categorical(m,labels=c("A","B","C"))<-"v"regression(m,additive=FALSE)<- y~v
## Not run:plot(y~v,sim(m,1000,p=c("y~v:2"=3)))## End(Not run)m <- lvm()categorical(m,labels=c("A","B","C"),p=c(0.5,0.3))<-"v"regression(m,additive=FALSE,beta=c(0,2,-1))<- y~v
## equivalent to:## regression(m,y~v,additive=FALSE) <- c(0,2,-1)regression(m,additive=FALSE,beta=c(0,4,-1))<- z~v
table(sim(m,1e4)$v)glm(y~v, data=sim(m,1e4))glm(y~v, data=sim(m,1e4,p=c("y~v:1"=3)))transform(m,v2~v)<-function(x) x=='A'sim(m,10)##################################################### Pre-calculate object##################################################m <- lvm(y~x)m2 <- sim(m,'y~x'=2)sim(m,10,'y~x'=2)sim(m2,10)## Faster