Skip to content

Commit

Permalink
added subclone penalty; tuned Gaussian likelihood
Browse files Browse the repository at this point in the history
  • Loading branch information
gavinha committed May 8, 2020
1 parent 08692df commit 3102297
Show file tree
Hide file tree
Showing 6 changed files with 140 additions and 98 deletions.
51 changes: 30 additions & 21 deletions R/EM.R
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ runEM <- function(copy, chr, chrInd, param, maxiter, verbose = TRUE,
KS <- nrow(param$jointStates)#K ^ S
N <- nrow(copy)
rho <- matrix(0, KS, N)
lambdas <- array(0, dim = c(K, S, maxiter)) # State Variances
lambdas <- array(0, dim = c(nrow(na.omit(param$lambda)), S, maxiter)) # State Variances
#covars <- rep(list(NULL), maxiter) #State covariance matrices
vars <- array(0, dim = c(KS, S, maxiter)) #array(0, dim = c(K, S, maxiter))
phi <- matrix(NA, length(param$phi_0), maxiter) # Tumour ploidy
Expand Down Expand Up @@ -63,7 +63,7 @@ runEM <- function(copy, chr, chrInd, param, maxiter, verbose = TRUE,
n[, i] <- param$n_0
sp[, i] <- param$sp_0
phi[, i] <- param$phi_0
lambdas[, , i] <- param$lambda #matrix(param$lambda, nrow = K, ncol = S, byrow = TRUE)
lambdas[, , i] <- na.omit(param$lambda) #matrix(param$lambda, nrow = K, ncol = S, byrow = TRUE)
lambdasKS <- as.matrix(na.omit(expand.grid(as.data.frame(lambdas[, , i])))) #as.matrix(expand.grid(rep(list(param$lambda), S)))
#covars[[i]] <- param$covar
if (param$likModel == "Gaussian"){
Expand All @@ -73,6 +73,10 @@ runEM <- function(copy, chr, chrInd, param, maxiter, verbose = TRUE,
mus[, , i] <- as.matrix(get2and3ComponentMixture(param$jointCNstates, param$jointSCstatus, n[, i], sp[, i], phi[, i]))

# Likelihood #
# if (!grepl("gauss", param$likModel, ignore.case = TRUE)){
# param$likModel <- "Gaussian"
# message("Switching to Gaussian likelihood model.")
# }
if (param$likModel == "t"){
for (ks in 1:KS) {
probs <- tdistPDF(copy, mus[ks, , i], lambdasKS[ks, ], param$nu)
Expand Down Expand Up @@ -103,7 +107,7 @@ runEM <- function(copy, chr, chrInd, param, maxiter, verbose = TRUE,
for (j in 1:length(chrsI)) {
I <- intersect(chrsI[[j]], which(chrInd))
if (length(I) > 0){
output <- .Call("forward_backward", piG[, i - 1], param$A, py[, I],PACKAGE = "HMMcopy")
output <- .Call("forward_backward", piG[, i - 1], A, py[, I],PACKAGE = "HMMcopy")
rho[, I] <- output$rho
loglik[i] <- loglik[i] + output$loglik
Zcounts <- Zcounts + t(colSums(aperm(output$xi, c(3, 2, 1))))
Expand Down Expand Up @@ -143,18 +147,19 @@ runEM <- function(copy, chr, chrInd, param, maxiter, verbose = TRUE,
ptm.em <- proc.time()
output <- estimateTParamsMap(copy[chrInd, ], n[, i - 1], sp[, i - 1], phi[, i - 1],
lambdas[, , i - 1], piG[, i - 1], A, param,
rho[, chrInd], Zcounts,
rho[, chrInd], Zcounts, loglik[i],
estimateNormal = estimateNormal, estimatePloidy = estimatePloidy,
estimatePrecision = estimatePrecision, estimateTransition = estimateTransition,
estimateInitDist = estimateInitDist, estimateSubclone = estimateSubclone)

lambdas[, , i] <- output$lambda
lambdasKS <- as.matrix(na.omit(expand.grid(as.data.frame(lambdas[, , i]))))
if (verbose == TRUE) {

for (s in 1:S){
message("Sample", s, " n=", signif(output$n[s], digits = 4), ", sp=", signif(output$sp[s], digits = 4),
", phi=", signif(output$phi[s], digits = 4),
", lambda=", paste0(signif(output$lambda[, s], digits=5), collapse = ","),
", lambda=", paste0(signif(output$lambda, digits=5), collapse = ","),
", F=", signif(output$F, digits=4))
}
}
Expand Down Expand Up @@ -203,7 +208,7 @@ runEM <- function(copy, chr, chrInd, param, maxiter, verbose = TRUE,
}
if ((abs(loglik[i] - loglik[i - 1]) / abs(loglik[i])) < likChangeConvergence){#} && loglik[i] > loglik[i - 1]) {
message("runEM iter", i-1, " EM Converged")
converged = 0
converged = 1
}
if (loglik[i] < loglik[i - 1]){
#message("LIKELIHOOD DECREASED!!!")
Expand Down Expand Up @@ -381,6 +386,7 @@ getNormLik <- function(copy, mus, varsKS, sw){
}
probs <- apply(y, 1, prod)
}))
py[is.na(py)] <- 1
return(py)
}

Expand Down Expand Up @@ -424,7 +430,7 @@ invWishartpdflog <- function(covar, psi, nu){
}

estimateTParamsMap <- function(D, n_prev, sp_prev, phi_prev, lambda_prev, pi_prev, A_prev,
params, rho, Zcounts,
params, rho, Zcounts, loglik,
estimateNormal = TRUE, estimatePloidy = TRUE,
estimatePrecision = TRUE, estimateInitDist = TRUE,
estimateTransition = TRUE, estimateSubclone = TRUE,
Expand All @@ -443,6 +449,7 @@ estimateTParamsMap <- function(D, n_prev, sp_prev, phi_prev, lambda_prev, pi_pre
n_hat <- n_prev
sp_hat <- sp_prev
phi_hat <- phi_prev
lambda_prev <- na.omit(lambda_prev)
lambda_hat <- lambda_prev
pi_hat <- pi_prev
A_hat <- A_prev
Expand All @@ -464,7 +471,7 @@ estimateTParamsMap <- function(D, n_prev, sp_prev, phi_prev, lambda_prev, pi_pre
suppressWarnings(
estNorm <- optim(n_prev, fn = completeLikelihoodFun, pType = rep("n", S), n = n_prev, sp = sp_prev, phi = phi_prev,
lambda = lambda_prev, piG = pi_hat, A = A_hat,
params = params, D = D, rho = rho, Zcounts = Zcounts,
params = params, D = D, rho = rho, Zcounts = Zcounts, lik = loglik,
estimateNormal = estimateNormal, estimatePloidy = estimatePloidy,
estimatePrecision = estimatePrecision, estimateInitDist = estimateInitDist,
estimateTransition = estimateTransition, estimateSubclone = estimateSubclone,
Expand All @@ -479,7 +486,7 @@ estimateTParamsMap <- function(D, n_prev, sp_prev, phi_prev, lambda_prev, pi_pre
suppressWarnings(
estSubclone <- optim(sp_prev, fn = completeLikelihoodFun, pType = rep("sp", S), n = n_hat, sp = sp_prev,
phi = phi_prev, lambda = lambda_prev, piG = pi_hat, A = A_hat,
params = params, D = D, rho = rho, Zcounts = Zcounts,
params = params, D = D, rho = rho, Zcounts = Zcounts, lik = loglik,
estimateNormal = estimateNormal, estimatePloidy = estimatePloidy,
estimatePrecision = estimatePrecision, estimateInitDist = estimateInitDist,
estimateTransition = estimateTransition, estimateSubclone = estimateSubclone,
Expand All @@ -494,7 +501,7 @@ estimateTParamsMap <- function(D, n_prev, sp_prev, phi_prev, lambda_prev, pi_pre
suppressWarnings(
estPhi <- optim(phi_prev, fn = completeLikelihoodFun, pType = rep("phi", length(phi_prev)), n = n_hat, sp = sp_hat,
phi = phi_prev, lambda = lambda_prev, piG = pi_hat, A = A_hat,
params = params, D = D, rho = rho, Zcounts = Zcounts,
params = params, D = D, rho = rho, Zcounts = Zcounts, lik = loglik,
estimateNormal = estimateNormal, estimatePloidy = estimatePloidy,
estimatePrecision = estimatePrecision, estimateInitDist = estimateInitDist,
estimateTransition = estimateTransition, estimateSubclone = estimateSubclone,
Expand All @@ -509,7 +516,7 @@ estimateTParamsMap <- function(D, n_prev, sp_prev, phi_prev, lambda_prev, pi_pre
suppressWarnings(
estLambda <- optim(c(lambda_prev), fn = completeLikelihoodFun, pType = rep("lambda", K*S), n = n_hat, sp = sp_hat,
phi = phi_hat, lambda = lambda_prev, piG = pi_hat, A = A_hat,
params = params, D = D, rho = rho, Zcounts = Zcounts,
params = params, D = D, rho = rho, Zcounts = Zcounts, lik = loglik,
estimateNormal = estimateNormal, estimatePloidy = estimatePloidy,
estimatePrecision = estimatePrecision, estimateInitDist = estimateInitDist,
estimateTransition = estimateTransition, estimateSubclone = estimateSubclone,
Expand Down Expand Up @@ -710,7 +717,9 @@ varUpdateEqn <- function(n, sp, phi, jointStates, jointCNstates, jointSCstatus,
# }
# term1 <- term1 - 2 * betaVar
# term2 <- term2 - 2 * (alphaVar + 1)
term1 <- c - 2*b*mus + a*mus^2 + 2*betaVar
#term1 <- c - 2*b*mus + a*mus^2 + 2*betaVar
d <- b/a
term1 <- c - 2*b*d + a*d^2 + 2*betaVar
term2 <- a + 2*(alphaVar + 1)
var_hat <- term1 / term2
return(var_hat)
Expand Down Expand Up @@ -765,7 +774,7 @@ stLikelihood <- function(n, sp, phi, lambda, params, D, rho){
## length of x will depend on which pararmeters are being estimated
## x values should be same as n, phi, lambda, pi, but may not necessarily
## include all of those variables
completeLikelihoodFun <- function(x, pType, n, sp, phi, lambda, piG, A, params, D, rho, Zcounts,
completeLikelihoodFun <- function(x, pType, n, sp, phi, lambda, piG, A, params, D, rho, Zcounts, lik,
estimateNormal = TRUE, estimatePloidy = TRUE,
estimatePrecision = TRUE, estimateTransition = FALSE,
estimateInitDist = TRUE, estimateSubclone = TRUE){
Expand All @@ -774,7 +783,7 @@ completeLikelihoodFun <- function(x, pType, n, sp, phi, lambda, piG, A, params,
S <- params$numberSamples
K <- length(params$ct)

lambda <- matrix(lambda, ncol = S, byrow = FALSE)
lambda <- na.omit(matrix(lambda, ncol = S, byrow = FALSE))

if (estimatePrecision && sum(pType == "lambda") > 0){
lambda <- matrix(x[pType == "lambda"], ncol = S, byrow = FALSE)
Expand Down Expand Up @@ -822,6 +831,7 @@ priorProbs <- function(n, sp, phi, lambda, var, piG, A, params,
if (estimateTransition){
priorA <- sapply(1:KS, function(ks){
dirichletpdflog(A[ks, ], params$dirPrior[ks, ])
#dirichletpdflog(params$A_prior[ks, ], A[ks, ])
})
}
priorA <- sum(priorA)
Expand All @@ -843,14 +853,14 @@ priorProbs <- function(n, sp, phi, lambda, var, piG, A, params,
if (estimateVar){
for (s in 1:S){
priorVar <- priorVar +
sum(invgammapdflog(as.data.frame(var[, s]), params$alphaVar[, s], params$betaVar[s]))
sum(invgammapdflog(as.data.frame(var[, s]), params$alphaVar[, s], params$betaVar[s]), na.rm = TRUE)
}
}
}else{ # param$likModel == "t"
if (estimatePrecision){
for (s in 1:S){
priorLambda <- priorLambda +
sum(gammapdflog(as.data.frame(lambda)[, s], params$alphaLambda, params$betaLambda[, s]))
sum(gammapdflog(as.data.frame(lambda)[, s], params$alphaLambda[s], params$betaLambda[, s]), na.rm = TRUE)
}
priorLambda <- as.numeric(priorLambda)
}
Expand Down Expand Up @@ -879,11 +889,10 @@ estimatePrecisionParamMap <- function(lambda, n, phi, params, D, rho){

estimateMixWeightsParamMap <- function(rho, kappa) {
K <- nrow(rho)
#pi <- (rho[, 1] + kappa - 1) / (sum(rho[, 1]) + sum(kappa) - K)
pi <- (rowSums(rho, na.rm = TRUE) + kappa - 1) /
# (ncol(rho) + sum(kappa) - K)
(sum(rowSums(rho, na.rm = TRUE)) + sum(kappa) - K)

pi <- (rho[, 1] + kappa - 1) #/ (sum(rho[, 1]) + sum(kappa) - K)
pi <- pi / sum(pi)
#pi <- (rowSums(rho, na.rm = TRUE) + kappa - 1) /
# (sum(rowSums(rho, na.rm = TRUE)) + sum(kappa) - K)
return(pi)
}

22 changes: 12 additions & 10 deletions R/plotting.R
Original file line number Diff line number Diff line change
Expand Up @@ -109,16 +109,16 @@ plotSolutions <- function(hmmResults.cor, tumour_copy, chrs, outDir, counts,
try(plotCovarBias(counts[[s]], covar = "gc", before = "reads",
after = "cor.gc", fit = "gc.fit", xlab = "GC Content",
pch = 20, cex = 0.5, mfrow = NULL),
silent=TRUE)
silent = TRUE)
try(plotCovarBias(counts[[s]], covar = "map", before = "cor.gc",
after = "cor.map", fit = NULL, xlab = "Mappability Score",
pch = 20, cex = 0.5, mfrow = NULL),
silent=TRUE)
silent = TRUE)

try(plotCovarBias(counts[[s]], covar = "repTime", before = "cor.map",
after = "cor.rep", fit = "rep.fit", xlab = "Replication Timing",
pch = 20, cex = 0.5, mfrow = NULL),
silent=TRUE)

silent = TRUE)
dev.off()

### PLOT TPDF ##
Expand Down Expand Up @@ -555,12 +555,14 @@ plotCovarBias <- function(correctOutput, covar = "gc",
ylab = "Uncorrected", xlab = xlab,
...)
# plot curve fit line
if (!is.null(fit)){
domain <- c(min(counts[[covar]], na.rm = TRUE), max(counts[[covar]], na.rm = TRUE))
fit.covar <- correctOutput[[fit]]
i <- seq(domain[1], domain[2], by = 0.001)
y <- predict(fit.covar, i)
lines(i, y, type="l", lwd=1, col="red")
if (!is.null(fit)){ # want to look at fit
if (!is.null(correctOutput[[fit]])){ # actually have the fit object/model as list element
domain <- c(min(counts[[covar]], na.rm = TRUE), max(counts[[covar]], na.rm = TRUE))
fit.covar <- correctOutput[[fit]]
i <- seq(domain[1], domain[2], by = 0.001)
y <- predict(fit.covar, i)
lines(i, y, type="l", lwd=1, col="red")
}
}

# select points to show (after)
Expand Down
79 changes: 41 additions & 38 deletions R/segmentation.R
Original file line number Diff line number Diff line change
Expand Up @@ -125,8 +125,8 @@ getTransitionMatrix <- function(K, e, strength){
}

getDefaultParameters <- function(x, maxCN = 5, ct.sc = c(1,3), n_0 = 0.5, ploidy_0 = 2,
normalIgnoreSC = 0.9,
e = 0.9999999, e.sameState = 10, strength = 10000000,
normal2IgnoreSC = 0.9, e = 0.9999999, e.subclone = 0.1,
e.sameState = 50, strength = 10000000,
includeHOMD = FALSE, likModel = "t"){
if (includeHOMD){
ct <- 0:maxCN
Expand All @@ -144,7 +144,7 @@ getDefaultParameters <- function(x, maxCN = 5, ct.sc = c(1,3), n_0 = 0.5, ploidy
nu = 2.1,
kappa = rep(75, length(ct)),
alphaLambda = 5,
betaVar = 1,
betaVar = 2,
likModel = likModel
)
K <- length(param$ct)
Expand Down Expand Up @@ -183,7 +183,7 @@ getDefaultParameters <- function(x, maxCN = 5, ct.sc = c(1,3), n_0 = 0.5, ploidy
#param$var <- t(replicate(K, diag(covar$covar)))
param$var <- matrix(apply(x, 2, var, na.rm=T), ncol = S, nrow = K, byrow = TRUE)
param$betaVar <- rep(param$betaVar, S)
alphaVar <- param$betaVar / (apply(x, 2, var, na.rm = TRUE) / sqrt(K))
alphaVar <- 1 / (apply(x, 2, var, na.rm = TRUE) / sqrt(K) ^ 2)
param$alphaVar <- matrix(alphaVar, ncol = S, nrow = KS, byrow = TRUE)
}

Expand Down Expand Up @@ -220,59 +220,62 @@ getDefaultParameters <- function(x, maxCN = 5, ct.sc = c(1,3), n_0 = 0.5, ploidy
# Initialize transition matrix to the prior
txn <- getTransitionMatrix(K ^ S, e, strength)
## set higher transition probs for same CN states across samples ##
# joint states where at least "tol" fraction of samples with the same CN state
# joint states where at least "e.sameState" fraction of samples with the same CN state
#apply(param$jointCNstates, 1, function(x){ sum(duplicated(as.numeric(x))) > 0 })
cnStateDiff <- apply(param$jointCNstates, 1, function(x){ (abs(max(x) - min(x)))})
if (e.sameState > 0 & S > 1){
txn$A[, cnStateDiff == 0] <- txn$A[, cnStateDiff == 0] * e.sameState * K
txn$A[, cnStateDiff >= 3] <- txn$A[, cnStateDiff >=3] / e.sameState / K
}
for (i in 1:nrow(txn$A)){
for (j in 1:ncol(txn$A)){
if (i == j){
txn$A[i, j] <- e
}
}
}
txn$A <- normalize(txn$A)
param$A <- txn$A
param$dirPrior <- txn$A * strength[1]
param$A[, param$ct.sc.status] <- param$A[, param$ct.sc.status] / 10
param$A <- normalize(param$A)
param$dirPrior[, param$ct.sc.status] <- param$dirPrior[, param$ct.sc.status] / 10

if (includeHOMD){
param$kappa <- rep(75, K ^ S)
param$kappa[cnStateDiff == 0] <- param$kappa[cnStateDiff == 0] + 125
param$kappa[cnStateDiff >=3] <- param$kappa[cnStateDiff >=3] - 50
param$kappa[which(rowSums(param$jointCNstates==2) == S)] <- 800

# penalize homozygous deletion state
if (includeHOMD){
K <- length(param$ct)
param$A[1, 2:K] <- param$A[1, 2:K] * 1e-5; param$A[2:K, 1] <- param$A[2:K, 1] * 1e-5;
param$A[1, 1] <- param$A[1, 1] * 1e-5
param$A <- normalize(param$A); param$dirPrior <- param$A * param$strength
txn$A[1, 2:K] <- txn$A[1, 2:K] * 1e-5;
txn$A[2:K, 1] <- txn$A[2:K, 1] * 1e-5;
txn$A[1, 1] <- txn$A[1, 1] * 1e-5
}

param$kappa <- rep(75, K ^ S)
param$kappa[cnStateDiff == 0] <- param$kappa[cnStateDiff == 0] + 125
param$kappa[cnStateDiff >=3] <- param$kappa[cnStateDiff >=3] - 50
param$kappa[which(rowSums(param$jointCNstates==2) == S)] <- 800
# for (i in 1:nrow(txn$A)){
# txn$A[i, i] <- e
# }

# penalize transitions into subclonal states
ind.bothSC <- rowSums(param$jointSCstatus) == S
txn$A[, ind.bothSC] <- txn$A[, ind.bothSC] / (e.subclone * K)

txn$A <- normalize(txn$A)
param$A <- txn$A
param$A_prior <- param$A
param$dirPrior <- param$A * strength[1]

# ignore subclones for sample with n >= normalIgnoreSC
# ignore subclones for sample with n >= normal2IgnoreSC
indKS <- 1:nrow(param$jointSCstatus) # use all joint states
indK <- 1:length(param$ct.sc.status)
if (sum(n_0 >= normalIgnoreSC) > 0){
for (i in which(n_0 >= normalIgnoreSC)){
# keep states excluding subclones for samples with n >= normalIgnoreSC
if (sum(n_0 >= normal2IgnoreSC) > 0){
for (i in which(n_0 >= normal2IgnoreSC)){
# keep states excluding subclones for samples with n >= normal2IgnoreSC
indKS <- intersect(indKS, which(!param$jointSCstatus[, i]))
indK <- intersect(indK, which(param$ct.sc.status))
param$lambda[indK, i] <- NaN
param$betaLambda[indK, i] <- NaN
param$var[indK, i] <- NaN
if (likModel == "Gaussian"){
param$var[indK, i] <- NaN
}
}

param$kappa <- param$kappa[indKS]
param$alphaVar <- param$alphaVar[indKS, ]
param$jointStates <- param$jointStates[indKS, ]
param$jointCNstates <- param$jointCNstates[indKS, ]
param$jointSCstatus <- param$jointSCstatus[indKS, ]
param$A <- param$A[indKS, indKS]
param$dirPrior <- param$dirPrior[indKS, indKS]
param$alphaVar <- param$alphaVar[indKS, , drop = FALSE]
param$jointStates <- param$jointStates[indKS, , drop = FALSE]
param$jointCNstates <- param$jointCNstates[indKS, , drop = FALSE]
param$jointSCstatus <- param$jointSCstatus[indKS, , drop = FALSE]
param$A <- normalize(param$A[indKS, indKS])
param$A_prior <- param$A
param$dirPrior <- param$A * strength[1]
}

return(param)
Expand Down
Loading

0 comments on commit 3102297

Please sign in to comment.