################################################################################### ######################### ####################### ######################### SECTION 1 ####################### ######################### ####################### ######################### CODE FOR BOOTSTRAP ROUTINE ####################### ######################### ####################### ################################################################################### bootstrap.data<-function(B,datafile="original.data.txt",file.base="boot", method="point"){ #Created by: Tiago Marques #Date (last update): 07-01-2005 #Purpose: Performs B bootstrap resamples and calls Distance to analysise the data in the file data.file.name # Notice that you have to have a command file, in the folder where R is, that is called file.base+".cmd.txt" # This file is produced by distance when analysing the original data in debug mode # The names for the diferent outputfiles, defined inside file.base+".cmd.txt", should have the same file.base prefix #Inputs: B: number of bootstrap resamples # file: datafile as produced by distance in debug mode # file.base: the prefix for name of the various file produced by cds # method: 'point' transect or 'line' transect #Returns: a data frame with B rows, in row h are the results of the h resample, and 24 columns, which are: # N's: by strata (7 columns) + total number of animals (in the distance sampling survey) # k's: number of samples by strata (7 columns) + total number of samples - so far just a control, it should be the same as the original survey # n's: number of detected animals by strata (7 columns)+ total number of detected animals library(svMisc) # for progress function to track progress # DEFINING THE FILE NAMES data.file.name<-paste(file.base,".data.txt",sep="") # read the data in data<-read.table(datafile,header=FALSE,na.strings="",sep = "",fill=TRUE) # create a vector of zeros to fill the data.frame with the results from this function fill.in<-rep(0,B) res.boot<-data.frame( Dhat.grp=fill.in, Dhat.ind=fill.in, mu=fill.in, Es=fill.in, LnL=fill.in, AIC=fill.in, N=fill.in, a1=fill.in, a2=fill.in, a3=fill.in, status=fill.in, n.detects=fill.in, D.ht.male=fill.in, D.ht.female=fill.in, sex.ratio=fill.in) m <- 0 #counter for the number of errors errors <- NULL #object to hold the status of analyis where errors occurs for(r in 1:B) { #for each resample progress(r, B) create.data.file.bootstrap(data,data.file.name) #creates the bootstrap data file for D5 results <- cds(file.base=file.base,ext.files=TRUE,bootstrap=TRUE) #runs D5 on the file created, using the apropriate cmd file if(results$status>2) { #if there was an error - do not know why but ignore analysis m <- m+1 #add 1 to the number of errors errors[m] <- results$status #while keeping a track of what these errors are } else { #if no error, collect the data # Reread bootstrap data file, keep only detections, # hang onto covariate coefficients boot.rep <- read.table(file=paste(file.base,".data.txt",sep=""), sep="\t", header=FALSE, col.names=c("strata","area","label","effort","distance","sex","cluster.size"),as.is=TRUE) boot.estimates <- compute.sigma.f0.Dht(results, boot.rep, method=method) for ( i.fields in 1:length(results)) { res.boot[r,i.fields] <- results[[i.fields]] } res.boot[r, i.fields+1] <- boot.estimates$num.detects res.boot[r, i.fields+2] <- boot.estimates$point.male res.boot[r, i.fields+3] <- boot.estimates$point.female res.boot[r, i.fields+4] <- boot.estimates$point.ratio # remove any temp files bootstrap <- TRUE file.names<-get.cds.file.names(file.base) if(bootstrap==FALSE) { if(is.null(file.base)) remove.files(file.names) } else {#if inside a bootstrap routine, always delete intermediate files, except the command file remove.files(file.names,bootstrap=bootstrap) } } } return(list(res.boot=res.boot,errors=errors)) } #----------------------------------- g.of.y <- function(y, sigma.sq){ exp(-y^2/(2*sigma.sq)) } compute.sigma.f0.Dht <- function(stats.file.results, datafile.covariates, method="line") { detects <- subset(datafile.covariates, !is.na(distance)) n.detects <- length(detects[,1]) coeff1 <- stats.file.results[[8]] coeff2 <- stats.file.results[[9]] coeff3 <- stats.file.results[[10]] # Hard-wired truncation distance and number of points surveyed for bustards w <- 500 k.points <- 133 total.effort <- 1130700 sq.meter.to.sq.km <- 1000 * 1000 sum.male <- 0 sum.female <- 0 for (i.detect in 1: n.detects) { this.cluster <- detects$cluster.size[i.detect] if (detects$sex[i.detect] == "M") { sigma.sq.male <- (coeff1 * exp(coeff3 * this.cluster )) ^2 # multiply h(0,i) or f(0,i) by s(i) a la eqn 3.32 Marques & Buckland(2004) if (method == "point" ) { sum.male <- sum.male + this.cluster / ( sigma.sq.male * ( 1 - exp(-w^2/(2*sigma.sq.male)))) } else { sum.male <- sum.male + this.cluster / integrate(g.of.y, 0, w, sigma.sq=sigma.sq.male)$value } } else { sigma.sq.female <- (coeff1 * exp(coeff2 + coeff3 * this.cluster )) ^2 if (method == "point" ) { sum.female <- sum.female + this.cluster / ( sigma.sq.female * ( 1 - exp(-w^2/(2*sigma.sq.female)))) } else { sum.female <- sum.female + this.cluster / integrate(g.of.y, 0, w, sigma.sq=sigma.sq.female)$value } } } # equations coded here approximate 3.45 removing A to estimate density if (method == "point" ) { D.ht.male <- 1/(2*k.points*pi) * sum.male * sq.meter.to.sq.km D.ht.female <- 1/(2*k.points*pi) * sum.female * sq.meter.to.sq.km } else { D.ht.male <- 1/(2*total.effort) * sum.male * sq.meter.to.sq.km D.ht.female <- 1/(2*total.effort) * sum.female * sq.meter.to.sq.km } sex.ratio <- D.ht.male / D.ht.female return(list(num.detects=n.detects, point.male=D.ht.male, point.female=D.ht.female, point.ratio=sex.ratio)) } #--------------------------------------------------------------------------------------- point.estimate <- function(cmdfile="point06.cmd.txt", statfile="point06.stat.txt", datafile="bust06.trunc.data.txt",method="line") { point <- run.cds(cmdfile) results <- read.stats.file(statfile) real.data <- read.table(file=datafile, sep="\t", header=FALSE, col.names=c("strata","area","label","effort", "distance","sex","cluster.size"),as.is=TRUE) point.values <- compute.sigma.f0.Dht(results, real.data, method="line") return(list(point.male=point.values$point.male, point.female=point.values$point.female, point.ratio=point.values$point.ratio)) } ################################################################################### ######################### ####################### ######################### SECTION 2 ####################### ######################### ####################### ######################### CREATING A BOOTSTRAP DATA FILE ####################### ######################### ####################### ################################################################################### #BOOTSTRAP PROCEDURE create.data.file.bootstrap<-function(data,filename="boot.data.txt"){ #Created by: Tiago Marques #Date (last update): 13-01-2005 #Purpose: This function is called by the function bootstrap.data and creates a resample data file, # based on an original file created by Distance # The resampling procedure should, at least for now, be the same as the one implemented by Distance # i.e. sampling inside each strata, using lines as the resampling unit, until the # same number of lines are obtained - however, need to check with Len why diferent errors # arise in this case - these comments will then be updated.... #Inputs: data - a data.frame with the data as created by D5 # filename - the name of the data file to be created #Returns: nothing, but creates a file in the current directory, named file.base+"data.txt", to be analysed by Distance num.points <- max(data$V3) resample <- sample(1:num.points, num.points, replace=TRUE) for (write.i in 1:num.points) { id <- resample[write.i] if (is.na(data[data$V3==id, 7])) { write.record <- paste(paste(data[data$V3==id,1], ".",sep=""), data[data$V3==id,2], paste(write.i, ". ", write.i, sep=""), data[data$V3==id,5], " ", " ", " ", sep="\t") # 3 blank fields at end when no detections at point } else { write.record <- paste(paste(data[data$V3==id,1], ".",sep=""), data[data$V3==id,2], paste(write.i, ". ", write.i, sep=""), data[data$V3==id,5], data[data$V3==id,6], data[data$V3==id,7], data[data$V3==id,8], sep="\t") } cat(write.record, file=filename, sep="\n", append=TRUE) } return() } ################################################################################### ######################### ####################### ######################### SECTION 3 ####################### ######################### ####################### ######################### THE RUNNING DISTANCE CODE ####################### ######################### Given by Len, some of it hacked ####################### ######################### ####################### ################################################################################### cds <- function (key, adj, L, w, A=NA, xi, zi, file.base,ext.files=TRUE,bootstrap=TRUE) { #Purpose: Driver function to run the mcds.exe engine from R #Updated by Tiago on 19/12/2004 so that it can take an external data and command input files #Useful if you want to do a non-standard bootstrap analysis of some data analyzed in Distance #and for which Distance is not capable of producing variance estimates #This function can be called inside a bootstrap procedure, as long as you update the #data file at each resample #Inputs: # key - vector string containing key functions # adj - vector string containing adjustment terms # L - total survey effort # A - survey area # xi - vector of perpendicular distances # zi - vector of cluster sizes # file.base - if specified and engine is cds, then the cds input and # output files are written into the current directory, with the # cds.file.base as a prefix and '.txt' as a suffix. E.g., setting # cds.file.base to 'cds' produces 'cds.cmd.txt', 'cds.data.txt', # 'cds.log.txt','cds.stat.txt','cds.plot.txt' and 'cds.boot.txt' # If not specified, these files are created in a temp location and # are deleted at the end. # ext.files - If true, then the funtion uses the external files 'file.base'+'.cms.'+txt' as command file # and 'file.base'+'.data.'+.txt' as data file. Typicaly these files would be the result of # runing Distance in debug mode, and should be placed in the working directory for R. # By default ext.files is false, so the function looks for the files produced by functions # 'create.data.file' and 'create.command.file' # You need to change the input comand file in order for the mcds engine to produce the files # that the function 'read.stats.file' expects, and that means that inside the command file you should define # file names with prefix = file.base # bootstrap - If true, procedure is being called inside a bootstrap routine, and intermediate files # are deleted at each loop step, except the command file #Outputs: list, containing # Nhat.grp, Nhat.ind, mu, nL, Es, LnL, AIC, status # Note - status is an integer: # 1=OK, 2=warnings, 3=errors, 4=file errors, 5=some other problem (e.g., program crash) #get input and output file names file.names<-get.cds.file.names(file.base) #if external not provided, create data file if(ext.files==FALSE) { create.data.file(file.names$data.file, L, A, xi, zi) } #if external not provided,create command file if(ext.files==FALSE) { create.command.file(file.names, key, adj, w) } #call cds engine run.status<-run.cds(file.names$cmd.file) # Eric changed this #harvest results from stats file res <- read.stats.file(file.names$stat.file) # do not clean out temporary files here because they are needed later in the # computation of the HT estimators of stratum-specific estimates #return results if(is.na(A)) { #If no Area was provided return(list(Dhat.grp=res$Dhat.grp, Dhat.ind=res$Dhat.ind, mu=res$mu, Es=res$Es, LnL=res$LnL, AIC=res$aic, N=res$N, a1=res$a1, a2=res$a2, a3=res$a3, status=run.status)) } else { #If Area was provided return(list(Nhat.grp=res$Dhat.grp*A, Nhat.ind=res$Dhat.ind*A, mu=res$mu, Es=res$Es, LnL=res$LnL, AIC=res$aic, a1=res$a1, a2=res$a2, a3=res$a3, status=run.status)) } } #---------------------------------------------------------------------------------------------- get.cds.file.names<-function(file.base=NULL) { #Purpose: returns a list of filenames given the file.base # If file.base is NULL filenames are in a temp directory # Otherwise, they are of the form file.base + # '.cmd'/'.data','.out', '.log','.stat','.boot','.plot' + '.txt' files.mid<-c("cmd","data","out", "log","stat","boot","plot") files.suffix<-"txt" if(is.null(file.base)) { #get temp file names file.names<-tempfile(files.mid) } else { file.names<-rep("",length(files.mid)) #and add the file base for(i in 1:length(files.mid)) { file.names[i]<-paste(file.base,".",files.mid[i],sep="") } } #add the appropriate suffix for(i in 1:length(files.mid)) { file.names[i]<-paste(file.names[i],files.suffix,sep=".") } return(list(cmd.file=file.names[1], data.file=file.names[2], out.file=file.names[3], log.file=file.names[4], stat.file=file.names[5], boot.file=file.names[6], plot.file=file.names[7])) } #---------------------------------------------------------------------------------------------- run.cds<-function(cmd.file.name) { #Purpose: runs the MCDS.exe engine and waits for it to finish # *Note* that mcds.exe needs to be in the working directory, or in the PATH windows environment # variable for this to work, as it makes no attempt to find the location of the file #Inputs: # cmd.file.name - name of the command file to run #Returns: # A status integer - 1=OK, 2=warnings, 3=errors, 4=file errors, 5=some other problem (e.g., program crash) command <- paste("mcds 0, \"", cmd.file.name,"\"",sep="") res<-system(command, intern=TRUE, invisible=TRUE) res<-as.integer(res) if(is.na(res)) res<-5 return(res) } #---------------------------------------------------------------------------------------------- read.stats.file<-function(stat.file.name) { # Author: Ingrid # Eric altered this to acquire coefficients from the covariates 05/01/2007 08:50:03 # Updates: hacked by Tiago on 05/01/2005, to include results by strata # and a few other intermediate statistics (see below on Returns) # Purpose: extracts results statistics from the MCDS stat file #Input: # stat.file.name - name of file to look in #Returns: list: # Dhat.grp - density of groups # Dhat.ind - density of individuals # mu - efective strip (half) width # nL - encounter rate # Es - mean cluster size # LnL - log-likelihood # AIC - Akaike Information Criterion # n - number of detected animals # k - number of transects # N - number of animals # Note2 - if stat.file.name doesn't list values should be null - can give a warning # ditto if any of the stats don't exist #read the file in and test that it has something in it lines.v<-readLines(stat.file.name) n.lines<-length(lines.v) if(n.lines==0) { stop ("Nothing to read!") } # go through each line, looking for the results we want and storing them in the appropriate place aic<-NULL;Dhat.grp<-NULL;Dhat.ind<-NULL;mu<-NULL;LnL<-NULL;Es<-NULL;N<-NULL;a1<-NULL;a2<-NULL;a3<-NULL for (line in 1:n.lines) { parsed.line<-split.line(lines.v[line]) if(parsed.line$ok) { if(parsed.line$module==2 & parsed.line$statistic==6){#effective strip (half) width line if(parsed.line$stratum==0) {mu[length(mu)+1]<-parsed.line$value} else{mu[parsed.line$stratum]<-parsed.line$value} } if(parsed.line$module==2 & parsed.line$statistic==7){#Akaike Information Criterion line aic<-parsed.line$value } if(parsed.line$module==2 & parsed.line$statistic==9){#log-likelihood line LnL<-parsed.line$value } if(parsed.line$module==2 & parsed.line$statistic==101){#covariate coefficient 1 a1<-parsed.line$value } if(parsed.line$module==2 & parsed.line$statistic==102){#covariate coefficient 2 a2<-parsed.line$value } if(parsed.line$module==2 & parsed.line$statistic==103){#covariate coefficient 3 a3<-parsed.line$value } if(parsed.line$module==3 & parsed.line$statistic==1){#mean cluster size line # using mean cluster size for now (statistic==1) - # if switch to cluster size regression will want statistic==3 if(parsed.line$stratum==0) {Es[length(Es)+1]<-parsed.line$value} else {Es[parsed.line$stratum]<-parsed.line$value} } if(parsed.line$module==4 & parsed.line$statistic==1){#density of groups line if(parsed.line$stratum==0) {Dhat.grp[length(Dhat.grp)+1]<-parsed.line$value} else {Dhat.grp[parsed.line$stratum]<-parsed.line$value} } if(parsed.line$module==4 & parsed.line$statistic==2){#density of individuals line if(parsed.line$stratum==0) {Dhat.ind[length(Dhat.ind)+1]<-parsed.line$value} else {Dhat.ind[parsed.line$stratum]<-parsed.line$value} } if(parsed.line$module==4 & parsed.line$statistic==3){#number of animals line if(parsed.line$stratum==0) {N[length(N)+1]<-parsed.line$value} else {N[parsed.line$stratum]<-parsed.line$value} } } } return(list(aic=aic, Dhat.grp=Dhat.grp, Dhat.ind=Dhat.ind, mu=mu, LnL=LnL, Es=Es,N=N, a1=a1, a2=a2,a3=a3)) } #---------------------------------------------------------------------------------------- split.line <- function(line) { #takes each line and returns the different values for stratum, samp, estimator, ... if(nchar(line)<6) stop ("This isn't a stats file!") stratum<-as.integer(substr(line,1,6)) if(is.na(stratum)) { #this means that ok=FALSE samp<-NULL estimator<-NULL module<-NULL statistic<-NULL value<-NULL cv<-NULL lcl<-NULL ucl<-NULL degrees.freedom<-NULL } else { ok=TRUE # Parse the rest of the line when ok is true samp <- as.integer(substr(line, 8, 13)) estimator<-as.integer(substr(line,14,15)) module<-as.integer(substr(line, 16,17)) statistic<-as.integer(substr(line,18,20)) value<-as.numeric(substr(line, 21,36)) cv<-as.numeric(substr(line, 37,45)) lcl<-as.numeric(substr(line, 46,60)) ucl<-as.numeric(substr(line, 61 ,75)) degrees.freedom<-as.integer(substr(line, 76,91)) } return(list(ok=ok,stratum=stratum,samp=samp, estimator=estimator, module=module, statistic=statistic, value=value, cv=cv, lcl=lcl, ucl=ucl, degrees.freedom=degrees.freedom)) } remove.files<-function(file.names,bootstrap=F) { #Purpose: removes the files listed in the vector file.names #Written by: Len Thomas 26/7/04 #Updated by: Tiago Marques 01-01-2005 so that the command file is not deleted (if used inside bootstrap loop) start<-1 if(bootstrap==TRUE) { start<-2#if used inside bootstrap loop, the command file is not deleted } for (i in start:length(file.names)) { if (file.exists(file.names[[i]])) file.remove(file.names[[i]]) } }