Skip to contents

Benchmarking betnet v2.0

This workflow evaluates the accuracy of the betnet Julia package by simulating a full genomic and temporal dataset. Unlike the TransPhylo workflow, this simulation explicitly models DNA sequence evolution using seq-gen.

Step 1: Data Generation and Julia Inference

We simulate an outbreak and generate a 1Mb genome for each individual. The script then calls the betnet2.0.jl CLI to perform the Bayesian inference.

numsim <- 10
set.seed(42)

for (i in 1:numsim) {
    # 1. Simulate Outbreak Truth
    outbreak <- simulate_outbreak(infection_rate = 3.0, removal_rate = 3.0, target_size = 10)
    write.csv(outbreak, paste0("outbreak", i, ".csv"))
    numcase <- nrow(outbreak)

    # 2. Extract Temporal Data (Infection and Removal times)
    tempfile <- paste0("sim_temporal", i, ".csv")
    write.csv(outbreak[, c(1, 5, 7)], file = tempfile, row.names = FALSE)

    # 3. Simulate Genomic Sequence (DNA)
    timetree <- build_timetree(outbreak, plot = FALSE)
    phytree <- sim_phytree(timetree, murate = 10^-6)
    ape::write.tree(phytree, "tree.tre")
    
    seqfile <- paste0("seq", i)
    # Using seq-gen for HKY model evolution across a 1Mb genome
    system(paste("seq-gen -mHKY -l1000000 tree.tre >", seqfile))

    # 4. Calculate SNP Distance Matrix
    dna_data <- ape::read.dna(seqfile)
    dist_mat <- ape::dist.dna(dna_data, model = "N", as.matrix = TRUE)
    
    # Ensure indices match the temporal data
    idx <- order(as.numeric(row.names(dist_mat)))
    snp_mat <- dist_mat[idx, idx]
    snpfile <- paste0("sim_snp", i, ".csv")
    write.csv(snp_mat, file = snpfile, row.names = FALSE)
    
    # 5. Execute betnet Inference via Julia CLI
    betnet_script <- system.file("julia", "betnet2.0.jl", package = "transNetwork")
    outputfile <- paste("parameter", i, ".csv", sep="")
    betnet_cmd <- paste("julia", betnet_script, "-t", tempfile, "-s", snpfile, "-o", outputfile) 
    system(betnet_cmd)
}

Step 2: Accuracy Assessment

After the Julia MCMC chains complete, we read the posterior samples back into R using summary_betnet to compare the inferred infectors against the simulation truth.

betnet_accuracy <- numeric(numsim)

for (i in 1:numsim) {
    # Load Truth and Estimation
    outbreak_true <- read.csv(paste0("outbreak", i, ".csv"))[, -1]
    input_mcmc <- paste0("parameter", i, ".csv")
    
    # Extract the MAP (Maximum A Posteriori) transmission network
    outbreak_est <- summary_betnet(input_mcmc, 
                                   paste0("sim_snp", i, ".csv"),
                                   paste0("sim_temporal", i, ".csv"),
                                   plot = FALSE)$transmission
    
    # Calculate difference (0 indicates correct infector identification)
    diff_table <- outbreak_est - outbreak_true
    betnet_accuracy[i] <- sum(diff_table$infector == 0) / (nrow(outbreak_true) - 1)
}

cat("Mean betnet Accuracy:", mean(betnet_accuracy))