We start by loading the necessary libraries and defining the function
to calculate the probability that two nucleotides are identical after
\(t\) generations under the
Jukes-Cantor model.
options(tidyverse.quiet = TRUE)
library(tidyverse)
library(ggplot2)
jc <- function(lambda, t) {
return(1 - (3/4)*(1 - exp(-8*lambda*t/3)))
}
jc_dist <- function(q) {
return((-3/4)*log(1 - (4/3)*(1 - q)))
}
Now we’ll look at how the probability of identity decays over 10,000
generations when \(\lambda = 0.0001\),
0.001, and 0.01.
t <- seq(0, 10000, by = 1)
dat <- tibble(lambda = NA, t = NA, q = NA)
for (lambda in c(0.0001, 0.001, 0.01)) {
q <- jc(lambda, t)
tmp <- tibble(lambda = as.character(lambda),
t = t,
q = q)
dat <- rbind(dat, tmp)
}
dat <- dat %>% filter(!is.na(lambda))
p <- ggplot(dat, aes(x = t, y = q,
fill = lambda,
color = lambda)) +
geom_line() +
theme_bw() +
ylim(0, 1) +
xlab("Generation") +
ylab("Probability nucleotides are identical") +
scale_color_brewer(type = "seq",
palette = "YlGnBu")
p

Notice that the probability of identity decreases to 0.25, but no
further. Let’s look at the complementary plot of evolutionary distance
as a function of percent sequence similarity.
q <- seq(1.0, 0.251, by = -0.001)
d <- jc_dist(q)
dat <- tibble(q = q, d = d)
p <- ggplot(dat, aes(x = q, y = d)) +
geom_line() +
theme_bw() +
xlim(0.25, 1) +
xlab("Percent sequence similarity") +
ylab("Jukes-Cantor distance")
p

LS0tCnRpdGxlOiAiSWxsdXN0cmF0aW5nIG51Y2xlb3RpZGUgc2VxdWVuY2UgZGl2ZXJnZW5jZSB1bmRlciB0aGUgSnVrZXMtQ2FudG9yIG1vZGVsIgpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sKLS0tCgpXZSBzdGFydCBieSBsb2FkaW5nIHRoZSBuZWNlc3NhcnkgbGlicmFyaWVzIGFuZCBkZWZpbmluZyB0aGUgZnVuY3Rpb24gdG8gY2FsY3VsYXRlIHRoZSBwcm9iYWJpbGl0eSB0aGF0IHR3byBudWNsZW90aWRlcyBhcmUgaWRlbnRpY2FsIGFmdGVyICR0JCBnZW5lcmF0aW9ucyB1bmRlciB0aGUgSnVrZXMtQ2FudG9yIG1vZGVsLgoKYGBge3J9Cm9wdGlvbnModGlkeXZlcnNlLnF1aWV0ID0gVFJVRSkKbGlicmFyeSh0aWR5dmVyc2UpCmxpYnJhcnkoZ2dwbG90MikKCmpjIDwtIGZ1bmN0aW9uKGxhbWJkYSwgdCkgewogcmV0dXJuKDEgLSAoMy80KSooMSAtIGV4cCgtOCpsYW1iZGEqdC8zKSkpCn0KCmpjX2Rpc3QgPC0gZnVuY3Rpb24ocSkgewogIHJldHVybigoLTMvNCkqbG9nKDEgLSAoNC8zKSooMSAtIHEpKSkKfQpgYGAKCk5vdyB3ZSdsbCBsb29rIGF0IGhvdyB0aGUgcHJvYmFiaWxpdHkgb2YgaWRlbnRpdHkgZGVjYXlzIG92ZXIgMTAsMDAwIGdlbmVyYXRpb25zIHdoZW4gJFxsYW1iZGEgPSAwLjAwMDEkLCAwLjAwMSwgYW5kIDAuMDEuCgpgYGB7cn0KdCA8LSBzZXEoMCwgMTAwMDAsIGJ5ID0gMSkKZGF0IDwtIHRpYmJsZShsYW1iZGEgPSBOQSwgdCA9IE5BLCBxID0gTkEpCmZvciAobGFtYmRhIGluIGMoMC4wMDAxLCAwLjAwMSwgMC4wMSkpIHsKICBxIDwtIGpjKGxhbWJkYSwgdCkKICB0bXAgPC0gdGliYmxlKGxhbWJkYSA9IGFzLmNoYXJhY3RlcihsYW1iZGEpLAogICAgICAgICAgICAgICAgdCA9IHQsCiAgICAgICAgICAgICAgICBxID0gcSkKICBkYXQgPC0gcmJpbmQoZGF0LCB0bXApCn0KZGF0IDwtIGRhdCAlPiUgZmlsdGVyKCFpcy5uYShsYW1iZGEpKQoKcCA8LSBnZ3Bsb3QoZGF0LCBhZXMoeCA9IHQsIHkgPSBxLCAKICAgICAgICAgICAgICAgICAgICAgZmlsbCA9IGxhbWJkYSwgCiAgICAgICAgICAgICAgICAgICAgIGNvbG9yID0gbGFtYmRhKSkgKwogIGdlb21fbGluZSgpICsKICB0aGVtZV9idygpICsKICB5bGltKDAsIDEpICsKICB4bGFiKCJHZW5lcmF0aW9uIikgKwogIHlsYWIoIlByb2JhYmlsaXR5IG51Y2xlb3RpZGVzIGFyZSBpZGVudGljYWwiKSArCiAgc2NhbGVfY29sb3JfYnJld2VyKHR5cGUgPSAic2VxIiwKICAgICAgICAgICAgICAgICAgICAgcGFsZXR0ZSA9ICJZbEduQnUiKQpwCmBgYAoKTm90aWNlIHRoYXQgdGhlIHByb2JhYmlsaXR5IG9mIGlkZW50aXR5IGRlY3JlYXNlcyB0byAwLjI1LCBidXQgbm8gZnVydGhlci4gTGV0J3MgbG9vayBhdCB0aGUgY29tcGxlbWVudGFyeSBwbG90IG9mIGV2b2x1dGlvbmFyeSBkaXN0YW5jZSBhcyBhIGZ1bmN0aW9uIG9mIHBlcmNlbnQgc2VxdWVuY2Ugc2ltaWxhcml0eS4KCmBgYHtyfQpxIDwtIHNlcSgxLjAsIDAuMjUxLCBieSA9IC0wLjAwMSkKZCA8LSBqY19kaXN0KHEpCmRhdCA8LSB0aWJibGUocSA9IHEsIGQgPSBkKQpwIDwtIGdncGxvdChkYXQsIGFlcyh4ID0gcSwgeSA9IGQpKSArCiAgZ2VvbV9saW5lKCkgKwogIHRoZW1lX2J3KCkgKwogIHhsaW0oMC4yNSwgMSkgKwogIHhsYWIoIlBlcmNlbnQgc2VxdWVuY2Ugc2ltaWxhcml0eSIpICsKICB5bGFiKCJKdWtlcy1DYW50b3IgZGlzdGFuY2UiKQpwCmBgYAo=