#
n.ctls.from.each.set = c(1,300); # will use min(these no.'s , available no.'s)#
#
sample.and.plot.results(n.ctls.from.each.set)
n.ctls.from.each.set = c(100,300); #
     # will use min(these no.'s , available no.'s)
sample.and.plot.results(n.ctls.from.each.set)
n.ctls.from.each.set = c(10,300); #
     # will use min(these no.'s , available no.'s)#
#
# CALL the function#
#
 sample.and.plot.results(n.ctls.from.each.set)
# Woburn Study Table 2 -- leukemia cases and peers#
#
dx.year=1900+c(66,69,69,72,72, 73,74,75,75.5,76, 76,78,79,80,81,82,83)#
birth.year.case=1900+c(59,57,64,65,68,70,65,64,75,63,72,63,69,66,68,79,74)#
age.dx.case = dx.year - birth.year.case#
n.in.riskset=c(218,290,265,182,183,170,213,239,115,219,132,219,164,199,187,154,84);#
n.risksets = length(n.in.riskset); n.risksets#
#
prop.exposed=c(.33,.26,.25,.36,.32,.19,.29,.38,.25,.40,.18,.40,.31,.39,.35,.23,.23);#
n.exposed     = round(n.in.riskset*prop.exposed); #
n.not.exposed = n.in.riskset - n.exposed ;#
case.exposure    = c(1.26,0,.75,4.3,2.76,.94,0,0,0,.37,0,7.88,2.41,0,0,.39,0);#
case.exposed     = (case.exposure > 0);#
case.not.exposed = 1 - case.exposed;#
controls.exposed     = n.exposed     - case.exposed; #
controls.not.exposed = n.not.exposed - (1-case.exposed);#
controls = n.in.riskset - 1#
#
require(survival)
#
sample.and.plot.results = function(sizes) {#
	#
	n.ctls.from.each.set = sizes;#
#
   par(mfrow=c(1,2), plt=c(0.1,0.99,.1,.99) );#
   for (sim in 1:2) {#
	   n.exposed.ctls=c() ; n.unexposed.ctls=c(); #
      actual.n.from.each = c(); pe=c();#
      for (i in 1:17) {#
         all.ctls=c( rep(1,controls.exposed[i]), rep(0,controls.not.exposed[i]) )#
         n.all=length(all.ctls); n.sampled = min(n.ctls.from.each.set[sim],n.all);#
         exposed.ctls.in.sample = sum( sample( all.ctls, n.sampled ) );#
         n.exposed.ctls  = c(n.exposed.ctls, exposed.ctls.in.sample);#
         n.unexposed.ctls= c(n.unexposed.ctls, n.sampled-exposed.ctls.in.sample);#
         actual.n.from.each = c(actual.n.from.each,n.sampled);#
         pe = c(pe,exposed.ctls.in.sample/n.sampled)#
      }#
            #
      # MH ratio#
      #
      n = actual.n.from.each+1;#
      mhNum=sum( case.exposed * n.unexposed.ctls   / n ) ;#
      mhDen=sum( case.not.exposed * n.exposed.ctls / n ) ;#
      logHR.MH = log(mhNum/mhDen)#
            #
      # set up data for coxph#
      #
      age=age.dx.case;case=rep(1,17);#
      exposed=case.exposed;freq=rep(1,17);riskset.no=1:17;#
      ds.cases       = cbind(age,case,exposed,freq,riskset.no);#
      age=age.dx.case;case=rep(0,17);exposed=rep(1,17);#
      freq=n.exposed.ctls;riskset.no=1:17;#
      ds.exp.ctls    = cbind(age,case,exposed,freq,riskset.no);#
      age=age.dx.case;case=rep(0,17);exposed=rep(0,17);#
      freq=n.unexposed.ctls;riskset.no=1:17;#
      ds.nonexp.ctls = cbind(age,case,exposed,freq,riskset.no); #
      temp = rbind(ds.cases,ds.exp.ctls,ds.nonexp.ctls);#
      ds=data.frame(temp[,1],temp[,2],temp[,3],temp[,4],temp[,5])#
      names(ds)=c("age","case","exposed","freq","riskset.no")#
   #
      # fit ph model matching each riskset of age.at.dx and year of birth#
      #
      # remove observations with 0 frequency, then fit model#
      ds = ds[ds$freq>0,];#
      fit = coxph(Surv(age,case) ~ exposed + strata(riskset.no), #
        weights=freq,data=ds)#
      alpha.hat = fit$coefficients ; me = 1.96*sqrt(fit$var)#
      HR.hat=exp(alpha.hat) ; #
      alpha.lo = alpha.hat-me; alpha.hi = alpha.hat+me; ;#
         #
      plot(c(1964.5,1986),c(0,20),type="n",xlab="Year",ylab="Age",ylim=c(0,16)) #
      dy=2; dx=0.4;#
      for (i in 1:17) {#
	      segments(birth.year.case[i],0 ,#
                 dx.year[i], dx.year[i]-birth.year.case[i],lty=3,col="grey")#
        rect(dx.year[i],dx.year[i]-birth.year.case[i],#
             dx.year[i]+dy,dx+dx.year[i]-birth.year.case[i],#
             col=c("green","red")[case.exposed[i]+1],border=NA )#
        if(pe[i]>0)#
             rect(dx.year[i],-0.05+dx.year[i]-birth.year.case[i],#
             dx.year[i]+dy*pe[i],-0.05-dx+dx.year[i]-birth.year.case[i],#
             col="red",border=NA );#
        if(pe[i]<1)#
           rect(dx.year[i]+dy*pe[i],-0.05+dx.year[i]-birth.year.case[i],#
                dx.year[i]+dy      ,-0.05-dx+dx.year[i]-birth.year.case[i],#
                col="green",border=NA )     #
        text(dx.year[i]+1.65*dy, -0.05-dx+dx.year[i]-birth.year.case[i],#
             toString(actual.n.from.each[i]),cex=0.8,adj=c(1,0))#
        text(dx.year[i]+1.65*dy,dx.year[i]-birth.year.case[i],"1",#
             cex=0.8,adj=c(1,0))#
      }#
      y.0=12; dy=0.8;#
      text(1966-0.2,y.0+ 4.4*dy,"log HR",adj=c(1,0),cex=0.7);#
      text(1967,y.0+ 4.4*dy,"Cox",adj=c(0.5,0),cex=0.6)#
      text(1968,y.0+ 4.4*dy,"MH",adj=c(0.5,0),cex=0.6)#
      #
      for(alpha in seq(0,4,1)) {#
         segments(1966,y.0+alpha*dy, 1966.5,y.0+alpha*dy);#
   	    text(1966-0.2,y.0+ alpha*dy,toString(alpha),adj=c(1,0.5),cex=0.6)#
      }#
      points(c(1967),y.0+c(alpha.hat)*dy,pch=20)#
      segments(1967,y.0+ alpha.lo*dy, 1967,y.0+ alpha.hi*dy)#
      points(c(1968),y.0+c(logHR.MH)*dy,pch=20)#
   }
#
#
}  # end of function definition
n.ctls.from.each.set = c(1,300); #
     # will use min(these no.'s , available no.'s)#
#
# CALL the function#
#
 sample.and.plot.results(n.ctls.from.each.set)
n.ctls.from.each.set = c(4,300); #
     # will use min(these no.'s , available no.'s)#
#
# CALL the function#
#
 sample.and.plot.results(n.ctls.from.each.set)
n.ctls.from.each.set = c(10,300); #
     # will use min(these no.'s , available no.'s)#
#
# CALL the function#
#
 sample.and.plot.results(n.ctls.from.each.set)
# Woburn Study Table 2 -- leukemia cases and peers#
#
dx.year=1900+c(66,69,69,72,72, 73,74,75,75.5,76, 76,78,79,80,81,82,83)#
birth.year.case=1900+c(59,57,64,65,68,70,65,64,75,63,72,63,69,66,68,79,74)#
age.dx.case = dx.year - birth.year.case#
n.in.riskset=c(218,290,265,182,183,170,213,239,115,219,132,219,164,199,187,154,84);#
n.risksets = length(n.in.riskset); n.risksets#
#
prop.exposed=c(.33,.26,.25,.36,.32,.19,.29,.38,.25,.40,.18,.40,.31,.39,.35,.23,.23);#
n.exposed     = round(n.in.riskset*prop.exposed); #
n.not.exposed = n.in.riskset - n.exposed ;#
case.exposure    = c(1.26,0,.75,4.3,2.76,.94,0,0,0,.37,0,7.88,2.41,0,0,.39,0);#
case.exposed     = (case.exposure > 0);#
case.not.exposed = 1 - case.exposed;#
controls.exposed     = n.exposed     - case.exposed; #
controls.not.exposed = n.not.exposed - (1-case.exposed);#
controls = n.in.riskset - 1#
#
require(survival)
#
sample.and.plot.results = function(sizes) {#
	#
	n.ctls.from.each.set = sizes;#
#
   par(mfrow=c(1,2), plt=c(0.1,0.99,.1,.99) );#
   for (sim in 1:2) {#
	   n.exposed.ctls=c() ; n.unexposed.ctls=c(); #
      actual.n.from.each = c(); pe=c();#
      for (i in 1:17) {#
         all.ctls=c( rep(1,controls.exposed[i]), rep(0,controls.not.exposed[i]) )#
         n.all=length(all.ctls); n.sampled = min(n.ctls.from.each.set[sim],n.all);#
         exposed.ctls.in.sample = sum( sample( all.ctls, n.sampled ) );#
         n.exposed.ctls  = c(n.exposed.ctls, exposed.ctls.in.sample);#
         n.unexposed.ctls= c(n.unexposed.ctls, n.sampled-exposed.ctls.in.sample);#
         actual.n.from.each = c(actual.n.from.each,n.sampled);#
         pe = c(pe,exposed.ctls.in.sample/n.sampled)#
      }#
            #
      # MH ratio#
      #
      n = actual.n.from.each+1;#
      mhNum=sum( case.exposed * n.unexposed.ctls   / n ) ;#
      mhDen=sum( case.not.exposed * n.exposed.ctls / n ) ;#
      logHR.MH = log(mhNum/mhDen)#
            #
      # set up data for coxph#
      #
      age=age.dx.case;case=rep(1,17);#
      exposed=case.exposed;freq=rep(1,17);riskset.no=1:17;#
      ds.cases       = cbind(age,case,exposed,freq,riskset.no);#
      age=age.dx.case;case=rep(0,17);exposed=rep(1,17);#
      freq=n.exposed.ctls;riskset.no=1:17;#
      ds.exp.ctls    = cbind(age,case,exposed,freq,riskset.no);#
      age=age.dx.case;case=rep(0,17);exposed=rep(0,17);#
      freq=n.unexposed.ctls;riskset.no=1:17;#
      ds.nonexp.ctls = cbind(age,case,exposed,freq,riskset.no); #
      temp = rbind(ds.cases,ds.exp.ctls,ds.nonexp.ctls);#
      ds=data.frame(temp[,1],temp[,2],temp[,3],temp[,4],temp[,5])#
      names(ds)=c("age","case","exposed","freq","riskset.no")#
   #
      # fit ph model matching each riskset of age.at.dx and year of birth#
      #
      # remove observations with 0 frequency, then fit model#
      ds = ds[ds$freq>0,];#
      fit = coxph(Surv(age,case) ~ exposed + strata(riskset.no), #
        weights=freq,data=ds)#
      alpha.hat = fit$coefficients ; me = 1.96*sqrt(fit$var)#
      HR.hat=exp(alpha.hat) ; #
      alpha.lo = alpha.hat-me; alpha.hi = alpha.hat+me; ;#
         #
      plot(c(1964.5,1986),c(0,20),type="n",xlab="Year",ylab="Age",ylim=c(0,16)) #
      dy=2; dx=0.4;#
      for (i in 1:17) {#
	      segments(birth.year.case[i],0 ,#
                 dx.year[i], dx.year[i]-birth.year.case[i],lty=3,col="grey")#
        rect(dx.year[i],dx.year[i]-birth.year.case[i],#
             dx.year[i]+dy,dx+dx.year[i]-birth.year.case[i],#
             col=c("green","red")[case.exposed[i]+1],border=NA )#
        if(pe[i]>0)#
             rect(dx.year[i],-0.05+dx.year[i]-birth.year.case[i],#
             dx.year[i]+dy*pe[i],-0.05-dx+dx.year[i]-birth.year.case[i],#
             col="red",border=NA );#
        if(pe[i]<1)#
           rect(dx.year[i]+dy*pe[i],-0.05+dx.year[i]-birth.year.case[i],#
                dx.year[i]+dy      ,-0.05-dx+dx.year[i]-birth.year.case[i],#
                col="green",border=NA )     #
        text(dx.year[i]+1.65*dy, -0.05-dx+dx.year[i]-birth.year.case[i],#
             toString(actual.n.from.each[i]),cex=0.8,adj=c(1,0))#
        text(dx.year[i]+1.65*dy,dx.year[i]-birth.year.case[i],"1",#
             cex=0.8,adj=c(1,0))#
      }#
      y.0=12; dy=0.8;#
      text(1966-0.2,y.0+ 4.4*dy,"log HR",adj=c(1,0),cex=0.7);#
      text(1967,y.0+ 4.4*dy,"Cox",adj=c(0.5,0),cex=0.6)#
      text(1968,y.0+ 4.4*dy,"MH",adj=c(0.5,0),cex=0.6)#
      #
      for(alpha in seq(0,4,1)) {#
         segments(1966,y.0+alpha*dy, 1966.5,y.0+alpha*dy);#
   	    text(1966-0.2,y.0+ alpha*dy,toString(alpha),adj=c(1,0.5),cex=0.6)#
      }#
      points(c(1967),y.0+c(alpha.hat)*dy,pch=20)#
      segments(1967,y.0+ alpha.lo*dy, 1967,y.0+ alpha.hi*dy)#
      points(c(1968),y.0+c(logHR.MH)*dy,pch=20)#
   }
#
#
}  # end of function definition
n.ctls.from.each.set = c(1,300); #
     # will use min(these no.'s , available no.'s)#
#
# CALL the function#
#
 sample.and.plot.results(n.ctls.from.each.set)
n.ctls.from.each.set = c(20,300); #
     # will use min(these no.'s , available no.'s)#
#
# CALL the function#
#
 sample.and.plot.results(n.ctls.from.each.set)
#
dx.year=1900+c(66,69,69,72,72, 73,74,75,75.5,76, 76,78,79,80,81,82,83)#
birth.year.case=1900+c(59,57,64,65,68,70,65,64,75,63,72,63,69,66,68,79,74)#
age.dx.case = dx.year - birth.year.case#
n.in.riskset=c(218,290,265,182,183,170,213,239,115,219,132,219,164,199,187,154,84);#
n.risksets = length(n.in.riskset); n.risksets#
#
prop.exposed=c(.33,.26,.25,.36,.32,.19,.29,.38,.25,.40,.18,.40,.31,.39,.35,.23,.23);#
n.exposed     = round(n.in.riskset*prop.exposed); #
n.not.exposed = n.in.riskset - n.exposed ;#
case.exposure    = c(1.26,0,.75,4.3,2.76,.94,0,0,0,.37,0,7.88,2.41,0,0,.39,0);#
case.exposed     = (case.exposure > 0);#
case.not.exposed = 1 - case.exposed;#
controls.exposed     = n.exposed     - case.exposed; #
controls.not.exposed = n.not.exposed - (1-case.exposed);#
controls = n.in.riskset - 1#
#
require(survival)#
#
# start of function definition#
#
sample.and.plot.results = function(sizes) {#
	#
	n.ctls.from.each.set = sizes;#
#
   par(mfrow=c(1,2), plt=c(0.1,0.99,.1,.99) );#
   for (sim in 1:2) {#
	   n.exposed.ctls=c() ; n.unexposed.ctls=c(); #
      actual.n.from.each = c(); pe=c();#
      for (i in 1:17) {#
         all.ctls=c( rep(1,controls.exposed[i]), rep(0,controls.not.exposed[i]) )#
         n.all=length(all.ctls); n.sampled = min(n.ctls.from.each.set[sim],n.all);#
         exposed.ctls.in.sample = sum( sample( all.ctls, n.sampled ) );#
         n.exposed.ctls  = c(n.exposed.ctls, exposed.ctls.in.sample);#
         n.unexposed.ctls= c(n.unexposed.ctls, n.sampled-exposed.ctls.in.sample);#
         actual.n.from.each = c(actual.n.from.each,n.sampled);#
         pe = c(pe,exposed.ctls.in.sample/n.sampled)#
      }#
            #
      # MH ratio#
      #
      n = actual.n.from.each+1;#
      mhNum=sum( case.exposed * n.unexposed.ctls   / n ) ;#
      mhDen=sum( case.not.exposed * n.exposed.ctls / n ) ;#
      logHR.MH = log(mhNum/mhDen)#
            #
      # set up data for coxph#
      #
      age=age.dx.case;case=rep(1,17);#
      exposed=case.exposed;freq=rep(1,17);riskset.no=1:17;#
      ds.cases       = cbind(age,case,exposed,freq,riskset.no);#
      age=age.dx.case;case=rep(0,17);exposed=rep(1,17);#
      freq=n.exposed.ctls;riskset.no=1:17;#
      ds.exp.ctls    = cbind(age,case,exposed,freq,riskset.no);#
      age=age.dx.case;case=rep(0,17);exposed=rep(0,17);#
      freq=n.unexposed.ctls;riskset.no=1:17;#
      ds.nonexp.ctls = cbind(age,case,exposed,freq,riskset.no); #
      temp = rbind(ds.cases,ds.exp.ctls,ds.nonexp.ctls);#
      ds=data.frame(temp[,1],temp[,2],temp[,3],temp[,4],temp[,5])#
      names(ds)=c("age","case","exposed","freq","riskset.no")#
   #
      # fit ph model matching each riskset of age.at.dx and year of birth#
      #
      # remove observations with 0 frequency, then fit model#
      ds = ds[ds$freq>0,];#
      fit = coxph(Surv(age,case) ~ exposed + strata(riskset.no), #
        weights=freq,data=ds)#
      alpha.hat = fit$coefficients ; me = 1.96*sqrt(fit$var)#
      HR.hat=exp(alpha.hat) ; #
      alpha.lo = alpha.hat-me; alpha.hi = alpha.hat+me; ;#
         #
      plot(c(1964.5,1986),c(0,20),type="n",xlab="Year",ylab="Age",ylim=c(0,16)) #
      dy=2; dx=0.4;#
      for (i in 1:17) {#
	      segments(birth.year.case[i],0 ,#
                 dx.year[i], dx.year[i]-birth.year.case[i],lty=3,col="grey")#
        rect(dx.year[i],dx.year[i]-birth.year.case[i],#
             dx.year[i]+dy,dx+dx.year[i]-birth.year.case[i],#
             col=c("green","red")[case.exposed[i]+1],border=NA )#
        if(pe[i]>0)#
             rect(dx.year[i],-0.05+dx.year[i]-birth.year.case[i],#
             dx.year[i]+dy*pe[i],-0.05-dx+dx.year[i]-birth.year.case[i],#
             col="red",border=NA );#
        if(pe[i]<1)#
           rect(dx.year[i]+dy*pe[i],-0.05+dx.year[i]-birth.year.case[i],#
                dx.year[i]+dy      ,-0.05-dx+dx.year[i]-birth.year.case[i],#
                col="green",border=NA )     #
        text(dx.year[i]+1.65*dy, -0.05-dx+dx.year[i]-birth.year.case[i],#
             toString(actual.n.from.each[i]),cex=0.8,adj=c(1,0))#
        text(dx.year[i]+1.65*dy,dx.year[i]-birth.year.case[i],"1",#
             cex=0.8,adj=c(1,0))#
      }#
      y.0=12; dy=0.8;#
      text(1966-0.2,y.0+ 4.4*dy,"log HR",adj=c(1,0),cex=0.7);#
      text(1967,y.0+ 4.4*dy,"Cox",adj=c(0.5,0),cex=0.6)#
      text(1968,y.0+ 4.4*dy,"MH",adj=c(0.5,0),cex=0.6)#
      #
      for(alpha in seq(0,4,1)) {#
         segments(1966,y.0+alpha*dy, 1966.5,y.0+alpha*dy);#
   	    text(1966-0.2,y.0+ alpha*dy,toString(alpha),adj=c(1,0.5),cex=0.6)#
      }#
      points(c(1967),y.0+c(alpha.hat)*dy,pch=20)#
      segments(1967,y.0+ alpha.lo*dy, 1967,y.0+ alpha.hi*dy)#
      points(c(1968),y.0+c(logHR.MH)*dy,pch=20)#
   }
#
#
}  # end of function definition#
#
#
n.ctls.from.each.set = c(10,300); #
     # will use min(these no.'s , available no.'s)
#
n.ctls.from.each.set = c(1,300); #
     # will use min(these no.'s , available no.'s)#
#
# CALL the function#
#
 sample.and.plot.results(n.ctls.from.each.set)
n.ctls.from.each.set = c(20,300); #
     # will use min(these no.'s , available no.'s)#
#
# CALL the function#
#
 sample.and.plot.results(n.ctls.from.each.set)
help(mice)
help(mic)
help(mice)
data(nhanes)#
imp <- mice(nhanes)     # do default multiple imputation on a numeric matrix#
imp#
imp$imputations$bmi     # and list the actual imputations #
complete(imp)       # show the first completed data matrix#
lm.mids(chl~age+bmi+hyp, imp)   # repeated linear regression on imputed data#
#
data(nhanes2)#
mice(nhanes2,im=c("sample","pmm","logreg","norm")) # imputation on mixed data with a different method per column
died=c(rep(1,300),rep(0,67000))#
exposed=c(rep(1,286),rep(0, 14),#
          rep(1,400),rep(0,260)#
          rep(NA,66000))
died=c(rep(1,300),rep(0,67000))#
exposed=c(rep(1,286),rep(0, 14),#
          rep(1,400),rep(0,260),#
          rep(NA,66000))
died=c(rep(1,300),rep(0,67000))#
exposed=c(rep(1,286),rep(0, 14),#
          rep(1,400),rep(0,260),#
          rep(NA,66000))#
summary(glm(died~exposed,family=bonomial))
died=c(rep(1,300),rep(0,67000))#
exposed=c(rep(1,286),rep(0, 14),#
          rep(1,400),rep(0,260),#
          rep(NA,66000))#
summary(glm(died~exposed,family=binomial))
died=c(rep(1,300),rep(0,66660))#
exposed=c(rep(1,286),rep(0, 14),#
          rep(1,400),rep(0,260),#
          rep(NA,66000))#
summary(glm(died~exposed,family=binomial))
s=glm(died~exposed,family=binomial)#
exp(s$coefficients)
s$var
str(s)
s$R
1/3.5
1/14
summry(s)
summary(s)
.2851^2
summary(s)$var
str(summary(s))
summary(s)$cov.scaled
1/286+1/14+1/400+1/260#
summary(s)$cov.scaled
s=glm(died~exposed,family=binomial)#
summary(s)#
exp(s$coefficients) ;#
#
1/286+1/14+1/400+1/260#
summary(s)$cov.scaled
help(mice)
ds=make.frame(died,exposed) ; head(ds) ; tail(ds)
ds=data.frame(died,exposed) ; head(ds) ; tail(ds)
mice(ds)
library(mice)
help(mice)
died=c(rep(1,300),rep(0,16660))#
exposed=c(rep(1,286),rep(0, 14),#
          rep(1,400),rep(0,260),#
          rep(NA,16000))#
ds=data.frame(died,exposed) ; head(ds) ; tail(ds)#
s=glm(died~exposed,family=binomial)#
summary(s)#
exp(s$coefficients) ;#
#
1/286+1/14+1/400+1/260#
summary(s)$cov.scaled
imp=mice(ds)
str(imp)
head(imp)
lm.mids(died~exposed,imp)
sample(1:66153,300)
unique(sample(1:66153,300))
length
length(unique(sample(1:66153,300)))
length(unique(sample(1:66153,300,replace=T)))
length(unique(sample(1:40006,286,replace=T)))
died=c(rep(1,300),rep(0,16660))#
exposed=c(rep(1,286),rep(0, 14),#
          rep(1,400),rep(0,260),#
          rep(NA,16000))#
ds=data.frame(died,exposed) ; head(ds) ; tail(ds)#
s=glm(died~exposed,family=binomial)#
summary(s)#
exp(s$coefficients) ;
1/286+1/14+1/400+1/260#
summary(s)$cov.scaled
library(mice)#
imp=mice(ds) ; head(imp)
imp=mice(ds) ;
str(imp)
str(imp$data)
summary(impo$data)
summary(imp$data)
imp$imp$exposed
head(imp$imp$exposed)
tail(imp$imp$exposed)
str(imp$data)
str(imp)
imp$data[1:10,]
imp$data[imp$data != NA,]
imp$data[imp$data != NA,2]
help(is.na)
is.na()
complete.cases(ds)
str(imp)
for (copy in 1:5){#
	y=imp$data[,1];#
	e=c(ds[complete.cases(ds),2], imp$imp[,i];#
	print(c( mean(y),mean(e)) )#
}
for (copy in 1:5){#
	y=ds[,1];#
	e=c(ds[complete.cases(ds),2], imp$imp[,i];#
	print(c( mean(y),mean(e)) )#
}
ds[,1]
for (copy in 1:5){#
	y = ds[,1];#
	e = c( ds[complete.cases(ds),2], imp$imp[,i] );#
	print(c( mean(y),mean(e)) )#
}
for (copy in 1:5){#
	y = ds[,1];#
	e = c( ds[complete.cases(ds),2], imp$imp[,copy] );#
	print(c( mean(y),mean(e)) )#
}
imp$imp[,1]
str(imp$imp)
imp$imp$exposed[,copy]
for (copy in 1:5){#
	y = ds[,1];#
	e = c( ds[complete.cases(ds),2], imp$imp$exposed[,copy] );#
	print(c( mean(y),mean(e)) )#
}
length(y)
for (copy in 1:5){#
	y = ds[,1];#
	e = c( ds[complete.cases(ds),2], imp$imp$exposed[,copy] );#
	fit= glm(y~e,family=poisson)	#
	print(fit$coefficients[2])#
}
help(mice)
n.copies=2;#
imp=mice(ds,m=n.copies) ; #
#
# str(imp) # str(imp$data)#
#
beta.hat=c();#
for (copy in 1:n.copies){#
	y = ds[,1];#
	e = c( ds[complete.cases(ds),2], imp$imp$exposed[,copy] );#
	fit= glm(y~e,family=poisson)	#
	beta.hat=c(beta.hat,fit$coefficients[2])#
}#
var(beta.hat)
var(beta.hat) ;  1/400+1/260
n.copies=10;#
imp=mice(ds,m=n.copies) ; #
#
# str(imp) # str(imp$data)#
#
beta.hat=c();#
for (copy in 1:n.copies){#
	y = ds[,1];#
	e = c( ds[complete.cases(ds),2], imp$imp$exposed[,copy] );#
	fit= glm(y~e,family=poisson)	#
	beta.hat=c(beta.hat,fit$coefficients[2])#
}#
var(beta.hat) ;  1/400+1/260
summary(fit)
exp(-6)
beta.hat
40046/(40046+26107)
died=c(rep(1,300),rep(0,62153-300))#
exposed=c(rep(1,286),rep(0, 14),#
          rep(1,600),rep(0,400),#
          rep(NA,62153-(300+1000))#
ds=data.frame(died,exposed) ; head(ds) ; tail(ds)#
s=glm(died~exposed,family=binomial)#
summary(s)#
exp(s$coefficients) ;
1/286+1/14+1/600+1/400#
summary(s)$cov.scaled
died=c(rep(1,300),rep(0,62153-300))#
exposed=c(rep(1,286),rep(0, 14),#
          rep(1,600),rep(0,400),#
          rep(NA,62153-(300+1000))#
ds=data.frame(died,exposed) ; head(ds) ; tail(ds)#
s=glm(died~exposed,family=binomial)#
summary(s)#
exp(s$coefficients) ;#
#
1/286+1/14+1/600+1/400#
summary(s)$cov.scaled
n.copies=10;#
imp=mice(ds,m=n.copies) ;
beta.hat=c();#
for (copy in 1:n.copies){#
	y = ds[,1];#
	e = c( ds[complete.cases(ds),2], imp$imp$exposed[,copy] );#
	fit= glm(y~e,family=poisson)	#
	beta.hat=c(beta.hat,fit$coefficients[2])#
}#
var(beta.hat) ;  1/600+1/400
imp$imp$data
str(imp)
summary(imp$imp$exposed)
rnorm(10,0.605,sqrt(0.605*0.395/1000))
stdev(rnorm(10,0.605,sqrt(0.605*0.395/1000)))
std(rnorm(10,0.605,sqrt(0.605*0.395/1000)))
s.dev(rnorm(10,0.605,sqrt(0.605*0.395/1000)))
help(var)
sd(rnorm(10,0.605,sqrt(0.605*0.395/1000)))
var(beta.hat)
var(beta.hat) ;  1/600+1/400
fit$coefficients[2]
library(boot)#
#
died=c(rep(1,300),rep(0,1000))#
exposed=c(rep(1,286),rep(0, 14),#
          rep(1,600),rep(0,400) )          #
dset=data.frame(died,exposed) ; head(dset) ; tail(dset)#
#
log.idr=function(d,i) {#
 idr = ( sum(d$died[i]*dset$exposed[i])/sum((1-dset$died[i])*dset$exposed[i]) ) /#
       ( sum(d$died[i]*(1-dset$exposed[i]))/sum((1-dset$died[i])*(1-dset$exposed[i])) ) ;#
 return(log(idr)) }#
log.idr(dset,1:1300)
bs=boot(dset,log.idr,R=1000,strata=died) ; #
var(bs$t) ; 1/286 + 1/14 + 1/600 + 1/400
bs=boot(dset,log.idr,R=1000,strata=dset$died) ; #
var(bs$t) ; 1/286 + 1/14 + 1/600 + 1/400
bs=boot(dset,log.idr,R=10000,strata=dset$died) ; #
var(bs$t) ; 1/286 + 1/14 + 1/600 + 1/400
# R function to create case-base dataset for use in fitting#
# parametric hazard functions via logistic regression#
# see Hanley and Miettinen, Int J Biostatistics 2009#
#
create.case.base.series=function(#
   ds,event.var,t.var,i.var, id.var, x.vars, b.c.ratio,random) #
{#
#
# user supplies...#
#
 # ds         : source dataset#
 # event.var  : event variable (1=event)  #
 # t.var      : event (or censoring) time#
 # i.var      : intervention (tx) variable#
 # id.var     : patient identifier#
 # x.vars     : vector of names of regressor variables#
 # b.c.ratio  : (integer) ratio, size of base series : case series#
 # random     : if 1 , #
#
# program calculates ...#
 #
 n.subjects = length(ds[,t.var]); # no. of subjects#
 B = sum(ds[,t.var]);             # total person-time in base#
 c = sum(ds[,event.var])          # no. of cases (events)#
 b = b.c.ratio * c;               # size of base series  #
 offset = log(B / b);             # offset so intercept = log(ID | x, t = 0 ) #
#
# & returns dataset with b+c rows of person-moments (p.m), x.vars, offset,#
# and an indicator variable y (1 if p.m represents an evemt, 0 otherwise )#
             #
  if (random==1)#
  {#
    p = ds[,t.var]/B;#
    who=sample(n.subjects, b, replace = TRUE, prob = p);#
    b.series=ds[who,] ;#
    b.series=b.series[,c(i.var,id.var,x.vars,t.var)] #
    b.series$y=0;#
    b.series[,t.var] = runif(b)*b.series[,t.var];#
    b.series$o = offset;#
  }       	#
  #
   if (random != 1)#
  {#
    d.t=B/(b+1);#
    p.sum=c(0);#
    for (i in 1:n.subjects){#
    	p.sum = c( p.sum, p.sum[i] + ds[i,t.var] )#
    	}#
    every.d.t = B*(1:b)/(b+1);#
    who=findInterval(every.d.t,p.sum);  #
    #print(who);#
    b.series=ds[who,] ;#
    b.series=b.series[,c(i.var,id.var,x.vars,t.var)]; #
    b.series$y=0;#
    b.series[,t.var] = every.d.t - p.sum[who];#
    b.series$o = offset;#
  }       	#
  #
  c.series=ds[ ds[event.var]==1, ] ;#
  c.series=c.series[,c(i.var,id.var,x.vars,t.var)] ; #
  c.series$y=1;#
  c.series[,t.var] = c.series[,t.var];#
  c.series$o = offset;   #
  c.b.series = rbind(c.series,b.series); #
  return(c.b.series); #
}#
#
# ovarian cancer dataset, example 5.11 in Collett 2nd edition #
#
setwd("/Users/jameshanley/Documents/work/osm/profileArticle/Rcode-hanley-miettinen/C3251/")#
       #
ds=read.table("ovariancancerpatients.txt",header=T);#
length(ds[,1]) ; summary(ds)#
k= sum(ds$status*log(ds$time)) ; # see top of page 177 #
	   #
library(survival)#
#
########## WEIBULL ######################################
#
# table 5.7#
#
fit.none=survreg(Surv(time,status) ~ 1, data=ds, dist="weibull");#
fit.none; -2*(fit.none$loglik+k)#
fit.age=survreg(Surv(time,status) ~ 1+age, data=ds, dist="weibull");#
fit.age ; -2*(fit.age$loglik+k)#
fit.age.treat=survreg(Surv(time,status) ~ 1+age+treat, data=ds, dist="weibull");#
fit.age.treat ; -2*(fit.age.treat$loglik+k)#
#
# conversions (p 178)#
#
mu=fit.age.treat$coefficients[1] ; sigma=fit.age.treat$scale ;#
lambda=exp(-mu/sigma) ; gamma=1/sigma ;#
c(mu,sigma,lambda,gamma) #
beta=-fit.age.treat$coefficients[2:3]/sigma ; beta#
log(lambda)+log(gamma)#
#
# hanley and miettinen#
#
case.base.ds=create.case.base.series(#
   ds,event.var="status",t.var="time",i.var="treat", id.var="patient", #
   x.vars=c("age"), b.c.ratio=100,random=0) #
case.base.ds$log.t = log(case.base.ds$time) #
case.base.ds[1:20,]#
#
weibull.fit = glm(y ~ age + treat + log.t,family=binomial, offset=o, data=case.base.ds)#
beta.hat=weibull.fit$coefficients ; beta.hat#
#
# intercept = log(lambda*gamma) in Collett formulation#
#
###### GOMPERTZ ######################################
#
# Collett p192#
#
# lambda =1.706x10^(-6); beta.age=0.122; beta.treat = -0.848; theta.hat=0.00138.#
#
# hanley and miettinen#
#
gompertz.fit = glm(y ~ age + treat + time, family=binomial, offset=o, data=case.base.ds)         #
beta.hat=gompertz.fit$coefficients ; beta.hat#
exp(beta.hat)
