# R function to create case-base dataset for use in fitting
# parametric hazard functions via logistic regression
# see Hanley and Miettinen, Int J Biostatistics 2009

create.case.base.series=function(
   ds,event.var,t.var,i.var, id.var, x.vars, b.c.ratio,random) 
{

# user supplies...

 # ds         : source dataset
 # event.var  : event variable (1=event)  
 # t.var      : event (or censoring) time
 # i.var      : intervention (tx) variable
 # id.var     : patient identifier
 # x.vars     : vector of names of regressor variables
 # b.c.ratio  : (integer) ratio, size of base series : case series
 # random     : if 1 , 

# program calculates ...
 
 n.subjects = length(ds[,t.var]); # no. of subjects
 B = sum(ds[,t.var]);             # total person-time in base
 c = sum(ds[,event.var])          # no. of cases (events)
 b = b.c.ratio * c;               # size of base series  
 offset = log(B / b);             # offset so intercept = log(ID | x, t = 0 ) 

# & returns dataset with b+c rows of person-moments (p.m), x.vars, offset,
# and an indicator variable y (1 if p.m represents an evemt, 0 otherwise )
             
  if (random==1)
  {
    p = ds[,t.var]/B;
    who=sample(n.subjects, b, replace = TRUE, prob = p);
    b.series=ds[who,] ;
    b.series=b.series[,c(i.var,id.var,x.vars,t.var)] 
    b.series$y=0;
    b.series[,t.var] = runif(b)*b.series[,t.var];
    b.series$o = offset;
  }       	
  
   if (random != 1)
  {
    d.t=B/(b+1);
    p.sum=c(0);
    for (i in 1:n.subjects){
    	p.sum = c( p.sum, p.sum[i] + ds[i,t.var] )
    	}
    every.d.t = B*(1:b)/(b+1);
    who=findInterval(every.d.t,p.sum);  
    #print(who);
    b.series=ds[who,] ;
    b.series=b.series[,c(i.var,id.var,x.vars,t.var)]; 
    b.series$y=0;
    b.series[,t.var] = every.d.t - p.sum[who];
    b.series$o = offset;
  }       	
  
  c.series=ds[ ds[event.var]==1, ] ;
  c.series=c.series[,c(i.var,id.var,x.vars,t.var)] ; 
  c.series$y=1;
  c.series[,t.var] = c.series[,t.var];
  c.series$o = offset;   
  c.b.series = rbind(c.series,b.series); 
  return(c.b.series); 
}

# ovarian cancer dataset, example 5.11 in Collett 2nd edition 

setwd("/Users/jameshanley/Dropbox/work/osm/profileArticle/Rcode-hanley-miettinen/C3251/")
       
ds=read.table("ovariancancerpatients.txt",header=T);
length(ds[,1]) ; summary(ds)
k= sum(ds$status*log(ds$time)) ; # see top of page 177 
	   
library(survival)

########## WEIBULL #####################################

# table 5.7

fit.none=survreg(Surv(time,status) ~ 1, data=ds, dist="weibull");
fit.none; -2*(fit.none$loglik+k)
fit.age=survreg(Surv(time,status) ~ 1+age, data=ds, dist="weibull");
fit.age ; -2*(fit.age$loglik+k)
fit.age.treat=survreg(Surv(time,status) ~ 1+age+treat, data=ds, dist="weibull");
fit.age.treat ; -2*(fit.age.treat$loglik+k)

# conversions (p 178)

mu=fit.age.treat$coefficients[1] ; sigma=fit.age.treat$scale ;
lambda=exp(-mu/sigma) ; gamma=1/sigma ;
c(mu,sigma,lambda,gamma) 
beta=-fit.age.treat$coefficients[2:3]/sigma ; beta
log(lambda)+log(gamma)

# hanley and miettinen

case.base.ds=create.case.base.series(
   ds,event.var="status",t.var="time",i.var="treat", id.var="patient", 
   x.vars=c("age"), b.c.ratio=100,random=0) 
case.base.ds$log.t = log(case.base.ds$time) 
case.base.ds[1:20,]

weibull.fit = glm(y ~ age + treat + log.t,family=binomial, offset=o, data=case.base.ds)
beta.hat=weibull.fit$coefficients ; beta.hat

# intercept = log(lambda*gamma) in Collett formulation

###### GOMPERTZ #####################################

# Collett p192

# lambda =1.706x10^(-6); beta.age=0.122; beta.treat = -0.848; theta.hat=0.00138.

# hanley and miettinen

gompertz.fit = glm(y ~ age + treat + time, family=binomial, offset=o, data=case.base.ds)         
beta.hat=gompertz.fit$coefficients ; beta.hat
exp(beta.hat)

