This figure contains data from many different experiments that were used to optimize the protocol for ORBIT. Data figures were made in R notebooks and exported as pdfs. Cosmetic improvements were made in Adobe Illustrator. Note that Figure 2A is a diagram that was made in Adobe Illustrator.
Setup packages and plotting for the notebook:
# Check packages
source("../tools/package_setup.R")
# Load packages
library(tidyverse)
library(cowplot)
library(kableExtra)
# Code display options
knitr::opts_chunk$set(tidy.opts=list(width.cutoff=60),tidy=FALSE, echo = TRUE, message=FALSE, warning=FALSE, fig.align="center", fig.retina = 2)
# Load plotting tools
source("../tools/plotting_tools.R")
#Modify the plot theme
theme_set(theme_notebook())This experiment used a ∆galK targeting oligo with a pInt_kanR integrating plasmid and tested different helper plasmid induction schemes. Let’s first read in the data.
df_cond <- read_csv("../../data/low_throughput_experiments/2021_07_01_tol_ara_params.csv")
df_cond %>% kable() %>% kable_styling() %>% scroll_box(height = '250px')| cond_name | cond_id | pre_tol | pre_ara | post_tol | post_ara | rep | LB | Kan | Kan_LB |
|---|---|---|---|---|---|---|---|---|---|
| uninduced - - | 1 | FALSE | FALSE | FALSE | FALSE | 1 | 3000000 | 22 | 0.0000073 |
| uninduced - - | 1 | FALSE | FALSE | FALSE | FALSE | 2 | 3900000 | 13 | 0.0000033 |
| uninduced - - | 1 | FALSE | FALSE | FALSE | FALSE | 3 | 5000000 | 5 | 0.0000010 |
| uninduced - - | 1 | FALSE | FALSE | FALSE | FALSE | NC | 3800000 | 8 | 0.0000021 |
| uninduced - + | 2 | FALSE | FALSE | FALSE | TRUE | 1 | 3000000 | 20 | 0.0000067 |
| uninduced - + | 2 | FALSE | FALSE | FALSE | TRUE | 2 | 2600000 | 10 | 0.0000038 |
| uninduced - + | 2 | FALSE | FALSE | FALSE | TRUE | 3 | 2000000 | 16 | 0.0000080 |
| uninduced - + | 2 | FALSE | FALSE | FALSE | TRUE | NC | 2400000 | 15 | 0.0000063 |
| uninduced + + | 3 | FALSE | FALSE | TRUE | TRUE | 1 | 330000 | 4 | 0.0000121 |
| uninduced + + | 3 | FALSE | FALSE | TRUE | TRUE | 2 | 420000 | 5 | 0.0000119 |
| uninduced + + | 3 | FALSE | FALSE | TRUE | TRUE | 3 | 320000 | 6 | 0.0000187 |
| uninduced + + | 3 | FALSE | FALSE | TRUE | TRUE | NC | 250000 | 8 | 0.0000320 |
| tol induced - - | 4 | TRUE | FALSE | FALSE | FALSE | 1 | 3400000 | 3200 | 0.0009412 |
| tol induced - - | 4 | TRUE | FALSE | FALSE | FALSE | 2 | 3000000 | 2800 | 0.0009333 |
| tol induced - - | 4 | TRUE | FALSE | FALSE | FALSE | 3 | 1700000 | 1600 | 0.0009412 |
| tol induced - - | 4 | TRUE | FALSE | FALSE | FALSE | NC | 2100000 | 9 | 0.0000043 |
| tol induced - + | 5 | TRUE | FALSE | FALSE | TRUE | 1 | 1760000 | 8300 | 0.0047159 |
| tol induced - + | 5 | TRUE | FALSE | FALSE | TRUE | 2 | 2400000 | 7100 | 0.0029583 |
| tol induced - + | 5 | TRUE | FALSE | FALSE | TRUE | 3 | 2600000 | 5000 | 0.0019231 |
| tol induced - + | 5 | TRUE | FALSE | FALSE | TRUE | NC | 3400000 | 8 | 0.0000024 |
| tol induced + + | 6 | TRUE | FALSE | TRUE | TRUE | 1 | 210000 | 63 | 0.0003000 |
| tol induced + + | 6 | TRUE | FALSE | TRUE | TRUE | 2 | 110000 | 45 | 0.0004091 |
| tol induced + + | 6 | TRUE | FALSE | TRUE | TRUE | 3 | 260000 | 90 | 0.0003462 |
| tol induced + + | 6 | TRUE | FALSE | TRUE | TRUE | NC | 180000 | 9 | 0.0000500 |
| tol + ara induced - - | 7 | TRUE | TRUE | FALSE | FALSE | 1 | 2600000 | 2400 | 0.0009231 |
| tol + ara induced - - | 7 | TRUE | TRUE | FALSE | FALSE | 2 | 3100000 | 5600 | 0.0018065 |
| tol + ara induced - - | 7 | TRUE | TRUE | FALSE | FALSE | 3 | 2900000 | 3900 | 0.0013448 |
| tol + ara induced - - | 7 | TRUE | TRUE | FALSE | FALSE | NC | 1100000 | 33 | 0.0000300 |
| tol + ara induced - + | 8 | TRUE | TRUE | FALSE | TRUE | 1 | 890000 | 6400 | 0.0071910 |
| tol + ara induced - + | 8 | TRUE | TRUE | FALSE | TRUE | 2 | 1900000 | 10200 | 0.0053684 |
| tol + ara induced - + | 8 | TRUE | TRUE | FALSE | TRUE | 3 | 2000000 | 6600 | 0.0033000 |
| tol + ara induced - + | 8 | TRUE | TRUE | FALSE | TRUE | NC | 2400000 | 56 | 0.0000233 |
| tol + ara induced + + | 9 | TRUE | TRUE | TRUE | TRUE | 1 | 150000 | 150 | 0.0010000 |
| tol + ara induced + + | 9 | TRUE | TRUE | TRUE | TRUE | 2 | 140000 | 160 | 0.0011429 |
| tol + ara induced + + | 9 | TRUE | TRUE | TRUE | TRUE | 3 | 190000 | 90 | 0.0004737 |
| tol + ara induced + + | 9 | TRUE | TRUE | TRUE | TRUE | NC | 40000 | 6 | 0.0001500 |
Now let’s plot the data. Note that the labeling of each condition gets complicated, so we made a special legend with + / - for each possible induction.
#Plot individual replicates, mean points / crossbars, and negative control Xs
plot_cond <- ggplot(df_cond %>% filter(rep != 'NC'), aes(x = factor(cond_id), y = Kan_LB)) +
geom_point(data = df_cond %>% filter(rep == 'NC') , shape = 4, color = 'light gray') +
geom_jitter(width = 0.1, height = 0, shape =21, alpha = 0.4, color = "#440154FF") +
stat_summary(fun = 'mean', geom = 'crossbar', width = 0.5, size = 0.25, color = "#440154FF")+
stat_summary(fun = 'mean', geom = 'point', width = 0.5, color = "#440154FF")+
scale_y_log10(labels = scales::label_percent(accuracy = 0.0001), breaks = c(0.000001,0.0001, 0.01))+
scale_x_discrete(labels = NULL)+
theme(axis.text.x = element_text(angle = 45, hjust = 1))+
labs(x = NULL, y = 'Efficiency')
plot_cond_labs <- df_cond %>% filter(rep == 1) %>% select(cond_id, pre_tol, pre_ara, post_tol, post_ara) %>% pivot_longer(cols = c('pre_tol','pre_ara','post_tol','post_ara'), names_to = 'inducer') %>% mutate(value = ifelse(value == T, '+','-')) %>%
ggplot(aes(x = factor(cond_id), y = inducer, label = value)) + geom_text() +
scale_y_discrete(limits = c('post_ara','pre_ara','post_tol','pre_tol'),labels = c('post ara','pre ara','post tol','pre tol')) +
labs(x = NULL, y = NULL) + theme(axis.line = element_blank(), axis.text.x = element_blank(), axis.ticks = element_blank())
plot_cond_legend <- plot_grid(plot_cond, plot_cond_labs, ncol = 1, rel_heights = c(4,1), rel_widths = c(1,1),
align = 'hv', axis = 'lr', scale = 1.0)
plot_cond_legendThis experiment tested the effect of targeting oligo length. Identical oligos were used for the four different loci, but their homology arms varied in length. The total length of the oligo is reported here, which includes both homology arms and the 38 bp attB site. Let’s read in the data.
df_len <- read_csv("../../data/low_throughput_experiments/2022_02_15_orbit_TO_len_data.csv") %>%
mutate(eff = Kan / LB)
df_len %>% kable() %>% kable_styling() %>% scroll_box(height = '250px')| targeting_oligo | gene | TO_len | replicate | LB | Kan | eff |
|---|---|---|---|---|---|---|
| 269 | galK | 74 | 1 | 530000 | 190 | 0.0003585 |
| 269 | galK | 74 | 2 | 860000 | 150 | 0.0001744 |
| 269 | galK | 74 | 3 | 1120000 | 220 | 0.0001964 |
| 264 | galK | 90 | 1 | 970000 | 28000 | 0.0288660 |
| 264 | galK | 90 | 2 | 400000 | 7900 | 0.0197500 |
| 264 | galK | 90 | 3 | 930000 | 9200 | 0.0098925 |
| 270 | galK | 104 | 1 | 920000 | 25000 | 0.0271739 |
| 270 | galK | 104 | 2 | 940000 | 22000 | 0.0234043 |
| 270 | galK | 104 | 3 | 1090000 | 15000 | 0.0137615 |
| 265 | galK | 120 | 1 | 1140000 | 24000 | 0.0210526 |
| 265 | galK | 120 | 2 | 910000 | 23000 | 0.0252747 |
| 265 | galK | 120 | 3 | 950000 | 18000 | 0.0189474 |
| 271 | hisA | 74 | 1 | 810000 | 230 | 0.0002840 |
| 271 | hisA | 74 | 2 | 890000 | 150 | 0.0001685 |
| 271 | hisA | 74 | 3 | 1010000 | 210 | 0.0002079 |
| 272 | hisA | 90 | 1 | 640000 | 1800 | 0.0028125 |
| 272 | hisA | 90 | 2 | 500000 | 2400 | 0.0048000 |
| 272 | hisA | 90 | 3 | 820000 | 3000 | 0.0036585 |
| 273 | hisA | 104 | 1 | 750000 | 17000 | 0.0226667 |
| 273 | hisA | 104 | 2 | 1130000 | 24000 | 0.0212389 |
| 273 | hisA | 104 | 3 | 770000 | 20000 | 0.0259740 |
| 266 | hisA | 120 | 1 | 1900000 | 27000 | 0.0142105 |
| 266 | hisA | 120 | 2 | 960000 | 31000 | 0.0322917 |
| 266 | hisA | 120 | 3 | 810000 | 40000 | 0.0493827 |
| 274 | metA | 74 | 1 | 820000 | 80 | 0.0000976 |
| 274 | metA | 74 | 2 | 740000 | 230 | 0.0003108 |
| 274 | metA | 74 | 3 | 740000 | 19 | 0.0000257 |
| 275 | metA | 90 | 1 | 710000 | 800 | 0.0011268 |
| 275 | metA | 90 | 2 | 870000 | 850 | 0.0009770 |
| 275 | metA | 90 | 3 | 790000 | 580 | 0.0007342 |
| 276 | metA | 104 | 1 | 600000 | 1200 | 0.0020000 |
| 276 | metA | 104 | 2 | 790000 | 2500 | 0.0031646 |
| 276 | metA | 104 | 3 | 850000 | 3400 | 0.0040000 |
| 267 | metA | 120 | 1 | 840000 | 10700 | 0.0127381 |
| 267 | metA | 120 | 2 | 730000 | 12800 | 0.0175342 |
| 267 | metA | 120 | 3 | 790000 | 10200 | 0.0129114 |
| 277 | leuD | 74 | 1 | 1300000 | 76 | 0.0000585 |
| 277 | leuD | 74 | 2 | 950000 | 78 | 0.0000821 |
| 277 | leuD | 74 | 3 | 760000 | 64 | 0.0000842 |
| 278 | leuD | 90 | 1 | 810000 | 1300 | 0.0016049 |
| 278 | leuD | 90 | 2 | 940000 | 1260 | 0.0013404 |
| 278 | leuD | 90 | 3 | 750000 | 1270 | 0.0016933 |
| 279 | leuD | 104 | 1 | 740000 | 1700 | 0.0022973 |
| 279 | leuD | 104 | 2 | 720000 | 2200 | 0.0030556 |
| 279 | leuD | 104 | 3 | 1030000 | 1800 | 0.0017476 |
| 268 | leuD | 120 | 1 | 670000 | 2800 | 0.0041791 |
| 268 | leuD | 120 | 2 | 570000 | 1770 | 0.0031053 |
| 268 | leuD | 120 | 3 | 710000 | 1600 | 0.0022535 |
| pInt_only | NA | NA | 1 | 1090000 | 300 | 0.0002752 |
| pInt_only | NA | NA | 2 | 1190000 | 250 | 0.0002101 |
| pInt_only | NA | NA | 3 | 1090000 | 130 | 0.0001193 |
Let’s plot the data.
# Calculate condition means and standard deviations
df_len_summary <- df_len %>%
filter(targeting_oligo != 'pInt_only') %>%
group_by(gene, TO_len) %>%
summarise(mean = mean(eff), sd = sd(eff))
# Get negative control value
df_len_control <- df_len %>% filter(targeting_oligo == 'pInt_only')
to_len_pInt <- mean(df_len_control$eff)
# Plot individual replicates, mean points, and connecting lines
plot_to_len <- ggplot(df_len, aes(x = TO_len, y = eff, color = gene)) +
geom_hline(yintercept = to_len_pInt, color = 'gray', linetype = 'dashed')+
geom_jitter(shape = 21, alpha = 0.4, width =1, height =0) +
geom_point(data = df_len_summary,
aes(y = mean),
position = position_jitter(height = 0, width = 0.1))+
# geom_point(data = df_len_summary %>% filter(!(gene=='metA' & TO_len == 74)),
# aes(y = mean),
# position = position_jitter(height = 0, width = 0.1))+
# geom_point(data = df_len_summary %>% filter(gene == 'metA' & TO_len == 74),
# aes(y = mean),
# position = position_jitter(height = 0, width = 0.5))+
geom_line(data = df_len_summary, aes(y = mean)) +
scale_y_log10(labels = scales::label_percent(accuracy = 0.01)) +
scale_x_continuous(breaks = c(74,90,104,120))+
scale_colour_viridis_d(limits = c('galK','hisA','metA','leuD')) + scale_fill_viridis_d(limits = c('galK','hisA','metA','leuD')) +
labs(x = "Targeting oligo length (nt)", y = "Efficiency", color = NULL)
plot_to_lenThis experiment tested targeting oligos binding the leading or lagging strand at each locus. 120 nt TOs were used. Let’s read in the data:
df_lag <- read_csv('../../data/low_throughput_experiments/2022_03_07_leading_lagging_data.csv') %>% #read in csv
mutate(eff = Kan_count / LB_count) %>% group_by(locus, strand) %>% mutate(avg_eff = mean(eff)) %>% #calculate efficiency and average efficiency for replicates
mutate(locus = factor(locus, levels = c('galK','hisA','metA','leuD','pInt only'))) %>%
mutate(strand = factor(strand, levels = c('leading','lagging','none')))
df_lag %>% kable() %>% kable_styling() %>% scroll_box(height = '250px')| locus | strand | replicate | LB_count | Kan_count | eff | avg_eff |
|---|---|---|---|---|---|---|
| galK | lagging | 1 | 23000000 | 130000 | 0.0056522 | 0.0061019 |
| galK | lagging | 2 | 29000000 | 190000 | 0.0065517 | 0.0061019 |
| galK | leading | 1 | 27000000 | 310 | 0.0000115 | 0.0000448 |
| galK | leading | 2 | 7300000 | 570 | 0.0000781 | 0.0000448 |
| hisA | lagging | 1 | 30000000 | 460000 | 0.0153333 | 0.0147037 |
| hisA | lagging | 2 | 27000000 | 380000 | 0.0140741 | 0.0147037 |
| hisA | leading | 1 | 27000000 | 670 | 0.0000248 | 0.0000322 |
| hisA | leading | 2 | 22000000 | 870 | 0.0000395 | 0.0000322 |
| metA | lagging | 1 | 1140000 | 13500 | 0.0118421 | 0.0093622 |
| metA | lagging | 2 | 1700000 | 11700 | 0.0068824 | 0.0093622 |
| metA | leading | 1 | 1700000 | 1260 | 0.0007412 | 0.0009649 |
| metA | leading | 2 | 1220000 | 1450 | 0.0011885 | 0.0009649 |
| leuD | lagging | 1 | 970000 | 3400 | 0.0035052 | 0.0036507 |
| leuD | lagging | 2 | 1080000 | 4100 | 0.0037963 | 0.0036507 |
| leuD | leading | 1 | 1040000 | 330 | 0.0003173 | 0.0002857 |
| leuD | leading | 2 | 1220000 | 310 | 0.0002541 | 0.0002857 |
| pInt only | none | 1 | 1300000 | 153 | 0.0001177 | 0.0000808 |
| pInt only | none | 2 | 2500000 | 110 | 0.0000440 | 0.0000808 |
Now let’s plot:
#Get negative control values
lag_pInt <- (df_lag %>% filter(strand=='none'))$avg_eff[1]
#Plot individual replicates, mean points and crossbars and negative control values.
plot_lag <- ggplot(df_lag %>% filter(strand!='none'), aes(x = strand, y = eff, color = locus)) +
geom_hline(yintercept = lag_pInt, color = 'light gray', linetype = 2)+
geom_point(position = position_dodge(width = 1), alpha =0.4, fill = NA, shape = 21) +
stat_summary(fun = 'mean', geom = 'crossbar',position = position_dodge(width = 1), width = 0.5, size = 0.25)+
stat_summary(fun = 'mean', geom = 'point',position = position_dodge(width = 1), width = 0.5)+
facet_grid(~locus)+
scale_color_viridis_d()+
scale_y_log10(labels = scales::label_percent(accuracy = 0.001), breaks = c(0.0001, 0.001,0.01)) +
scale_x_discrete(labels = c('lead','lag'))+
labs(y = 'Efficiency', x = 'Targeting oligo strand', color = NULL)+guides( color = 'none')+
theme(panel.border = element_rect(color = 'black', fill = NA))
plot_lagThis experiment tested the effect of TO concentration (final in 50 µL cell aliquots). Standard 120 nt TOs were used for each locus. Let’s read in the data:
df_to_conc <- read_csv("../../data/low_throughput_experiments/2022_09_28_TO_conc_4_loci_data.csv")%>%
mutate(eff = Kan_count / LB_count) %>%
group_by(TO_conc, locus, condition) %>%
mutate(avg_eff = mean(eff, na.rm = T)) %>% #calculate efficiency and average efficiency for replicates
mutate(locus = factor(locus, levels = c('galK','hisA','metA','leuD')))
df_to_conc %>% kable() %>% kable_styling() %>% scroll_box(height = '250px')| condition | TO_conc | locus | replicate | LB_count | Kan_count | eff | avg_eff |
|---|---|---|---|---|---|---|---|
| 4uM galK | 4000 | galK | 1 | 1300000 | 570 | 0.0004385 | 0.0003604 |
| 4uM galK | 4000 | galK | 2 | 1700000 | 480 | 0.0002824 | 0.0003604 |
| 1uM galK | 1000 | galK | 1 | 1700000 | 820 | 0.0004824 | 0.0004252 |
| 1uM galK | 1000 | galK | 2 | 2500000 | 920 | 0.0003680 | 0.0004252 |
| 100nM galK | 100 | galK | 1 | 2400000 | 900 | 0.0003750 | 0.0005486 |
| 100nM galK | 100 | galK | 2 | 1800000 | 1300 | 0.0007222 | 0.0005486 |
| 10nM galK | 10 | galK | 1 | 1100000 | 560 | 0.0005091 | 0.0002962 |
| 10nM galK | 10 | galK | 2 | 2400000 | 200 | 0.0000833 | 0.0002962 |
| 4uM hisA | 4000 | hisA | 1 | 1700000 | 1230 | 0.0007235 | 0.0008618 |
| 4uM hisA | 4000 | hisA | 2 | 1000000 | 1000 | 0.0010000 | 0.0008618 |
| 1uM hisA | 1000 | hisA | 1 | 1610000 | 730 | 0.0004534 | 0.0002737 |
| 1uM hisA | 1000 | hisA | 2 | 1160000 | 109 | 0.0000940 | 0.0002737 |
| 100nM hisA | 100 | hisA | 1 | 2400000 | 1600 | 0.0006667 | 0.0011333 |
| 100nM hisA | 100 | hisA | 2 | 2500000 | 4000 | 0.0016000 | 0.0011333 |
| 10nM hisA | 10 | hisA | 1 | 2500000 | 830 | 0.0003320 | 0.0007035 |
| 10nM hisA | 10 | hisA | 2 | 1200000 | 1290 | 0.0010750 | 0.0007035 |
| 4uM metA | 4000 | metA | 1 | 2000000 | 860 | 0.0004300 | 0.0004885 |
| 4uM metA | 4000 | metA | 2 | 1700000 | 930 | 0.0005471 | 0.0004885 |
| 1uM metA | 1000 | metA | 1 | 2400000 | 1800 | 0.0007500 | 0.0005950 |
| 1uM metA | 1000 | metA | 2 | 1500000 | 660 | 0.0004400 | 0.0005950 |
| 100nM metA | 100 | metA | 1 | 2200000 | 1830 | 0.0008318 | 0.0010945 |
| 100nM metA | 100 | metA | 2 | 2800000 | 3800 | 0.0013571 | 0.0010945 |
| 10nM metA | 10 | metA | 1 | 1100000 | 610 | 0.0005545 | 0.0007523 |
| 10nM metA | 10 | metA | 2 | 1000000 | 950 | 0.0009500 | 0.0007523 |
| 4uM leuD | 4000 | leuD | 1 | 1700000 | 330 | 0.0001941 | 0.0002037 |
| 4uM leuD | 4000 | leuD | 2 | 1500000 | 320 | 0.0002133 | 0.0002037 |
| 1uM leuD | 1000 | leuD | 1 | 1600000 | 140 | 0.0000875 | 0.0001009 |
| 1uM leuD | 1000 | leuD | 2 | 1400000 | 160 | 0.0001143 | 0.0001009 |
| 100nM leuD | 100 | leuD | 1 | 1500000 | 480 | 0.0003200 | 0.0002475 |
| 100nM leuD | 100 | leuD | 2 | 1600000 | 280 | 0.0001750 | 0.0002475 |
| 10nM leuD | 10 | leuD | 1 | 1800000 | 80 | 0.0000444 | 0.0000568 |
| 10nM leuD | 10 | leuD | 2 | 2600000 | 180 | 0.0000692 | 0.0000568 |
| pInt only | 0 | NA | 1 | 2900000 | 30 | 0.0000103 | 0.0000172 |
| pInt only | 0 | NA | 2 | 1500000 | 36 | 0.0000240 | 0.0000172 |
Let’s plot the data:
#Calculate negative control value
to_conc_pInt <- (df_to_conc %>% filter(condition == 'pInt only'))$avg_eff[1]
#Plot with individual observations, mean points and connecting lines
plot_to_conc <- ggplot(df_to_conc %>% filter(TO_conc>0), aes(x = TO_conc, y = avg_eff, color = locus)) +
geom_hline(yintercept = to_conc_pInt, linetype = 2, color = 'light gray')+
geom_jitter(aes(y = eff), shape = 21, alpha = 0.4, width = 0.1, height = 0)+
geom_point() + geom_line() + scale_x_log10(breaks = c(10,100,1000,4000), labels = c('10 nM', '100 nM', '1 µM','4 µM')) + scale_color_viridis_d() +
scale_y_continuous(labels = scales::label_percent(), trans = 'log10', breaks = c(0.001, 0.0001, 0.00001), limits = c(0.00001,NA))+
labs(y = 'Efficiency',x = 'Targeting oligo concentration')
plot_to_concThis experiment tested the effect of how much integrating plasmid (pInt_attP1_kanR) was added to the ORBIT transformation. Let’s read in the data:
df_pint_conc <- read_csv('../../data/low_throughput_experiments/2022_03_31_pInt_conc_galK_hisA_data.csv') %>% #read in csv
mutate(eff = Kan_count / LB_count) %>% group_by(pInt_ng, TO_added, locus) %>% mutate(avg_eff = mean(eff, na.rm = T)) #calculate efficiency and average efficiency for replicates
df_pint_conc %>% kable() %>% kable_styling() %>% scroll_box(height = '250px')| condition | pInt_ng | TO_added | locus | replicate | LB_count | Kan_count | eff | avg_eff |
|---|---|---|---|---|---|---|---|---|
| 1ng pInt | 1 | FALSE | pInt_only | 1 | 1.02e+08 | 7 | 0.0000001 | 0.0000001 |
| 1ng pInt | 1 | FALSE | pInt_only | 2 | 7.00e+07 | 8 | 0.0000001 | 0.0000001 |
| 10ng pInt | 10 | FALSE | pInt_only | 1 | 6.60e+07 | 20 | 0.0000003 | 0.0000003 |
| 10ng pInt | 10 | FALSE | pInt_only | 2 | 7.90e+07 | 20 | 0.0000003 | 0.0000003 |
| 100ng pInt | 100 | FALSE | pInt_only | 1 | 6.80e+07 | 290 | 0.0000043 | 0.0000033 |
| 100ng pInt | 100 | FALSE | pInt_only | 2 | 7.20e+07 | 170 | 0.0000024 | 0.0000033 |
| 278ng pInt | 278 | FALSE | pInt_only | 1 | 7.20e+07 | 280 | 0.0000039 | 0.0000061 |
| 278ng pInt | 278 | FALSE | pInt_only | 2 | 6.20e+07 | 520 | 0.0000084 | 0.0000061 |
| 1ng pInt + p265 | 1 | TRUE | galK | 1 | 6.90e+07 | 510 | 0.0000074 | 0.0000099 |
| 1ng pInt + p265 | 1 | TRUE | galK | 2 | 6.40e+07 | 800 | 0.0000125 | 0.0000099 |
| 10ng pInt + p265 | 10 | TRUE | galK | 1 | 5.80e+07 | 4300 | 0.0000741 | 0.0001103 |
| 10ng pInt + p265 | 10 | TRUE | galK | 2 | 5.60e+07 | 8200 | 0.0001464 | 0.0001103 |
| 100ng pInt + p265 | 100 | TRUE | galK | 1 | 2.50e+07 | 35000 | 0.0014000 | 0.0016583 |
| 100ng pInt + p265 | 100 | TRUE | galK | 2 | 2.40e+07 | 46000 | 0.0019167 | 0.0016583 |
| 278ng pInt + p265 | 278 | TRUE | galK | 1 | 2.10e+07 | 75000 | 0.0035714 | 0.0028151 |
| 278ng pInt + p265 | 278 | TRUE | galK | 2 | 1.70e+07 | 35000 | 0.0020588 | 0.0028151 |
| 1ng pInt + p266 | 1 | TRUE | hisA | 1 | 3.10e+07 | 940 | 0.0000303 | 0.0000311 |
| 1ng pInt + p266 | 1 | TRUE | hisA | 2 | 2.60e+07 | 830 | 0.0000319 | 0.0000311 |
| 10ng pInt + p266 | 10 | TRUE | hisA | 1 | 7.00e+07 | 22000 | 0.0003143 | 0.0003264 |
| 10ng pInt + p266 | 10 | TRUE | hisA | 2 | 6.50e+07 | 22000 | 0.0003385 | 0.0003264 |
| 100ng pInt + p266 | 100 | TRUE | hisA | 1 | 6.10e+07 | 97000 | 0.0015902 | 0.0015902 |
| 100ng pInt + p266 | 100 | TRUE | hisA | 2 | 5.60e+07 | NA | NA | 0.0015902 |
| 278ng pInt + p266 | 278 | TRUE | hisA | 1 | 5.20e+07 | 180000 | 0.0034615 | 0.0054808 |
| 278ng pInt + p266 | 278 | TRUE | hisA | 2 | 5.60e+07 | 420000 | 0.0075000 | 0.0054808 |
Note that the second hisA 100ng data point is NA because it came back as zero colonies on the plate, which was an obvious error.
plot_pint_conc <- ggplot(df_pint_conc, aes(x = pInt_ng, y = eff, color = locus )) +
geom_path(data = df_pint_conc %>% group_by(pInt_ng, TO_added,locus, avg_eff) %>% summarise(),
aes(x = pInt_ng, y = avg_eff, group = locus), size = 0.5) +
geom_jitter(shape = 21, width = 0.025, height = 0, alpha = 0.4) +
geom_point(data = . %>% filter(replicate==1), aes(y = avg_eff))+
scale_color_manual(values = c("#440154FF","#21908CFF", 'light gray'), labels = c('galK','hisA','control'))+
scale_x_log10()+
scale_y_log10(labels = scales::label_percent(accuracy = 0.0001))+
labs(x = 'Integrating plasmid concentration (ng) ', y ='Efficiency', fill = 'Locus')
plot_pint_concThis experiment tested the effect of arabinose (bxb-1 inducer for pHelper-Ec1-gentR) in a 1 hr recovery culture. Let’s read in the data:
df_ara <- read_csv('../../data/low_throughput_experiments/2022_06_21_arabinose_levels_data.csv') %>% #read in csv
mutate(eff = Kan_count / LB_count) %>% group_by(arabinose_per, locus) %>% mutate(avg_eff = mean(eff)) #calculate efficiency and average efficiency for replicates
df_ara %>% kable() %>% kable_styling() %>% scroll_box(height = '250px')| condition | arabinose_per | TO | locus | replicate | LB_count | Kan_count | eff | avg_eff |
|---|---|---|---|---|---|---|---|---|
| 0% arabinose | 0.00 | p265 | galK | 1 | 430000 | 370 | 0.0008605 | 0.0006352 |
| 0% arabinose | 0.00 | p265 | galK | 2 | 610000 | 250 | 0.0004098 | 0.0006352 |
| 0.01 % arabinose | 0.01 | p265 | galK | 1 | 680000 | 550 | 0.0008088 | 0.0008317 |
| 0.01 % arabinose | 0.01 | p265 | galK | 2 | 550000 | 470 | 0.0008545 | 0.0008317 |
| 0.1 % arabinose | 0.10 | p265 | galK | 1 | 440000 | 960 | 0.0021818 | 0.0018556 |
| 0.1 % arabinose | 0.10 | p265 | galK | 2 | 510000 | 780 | 0.0015294 | 0.0018556 |
| 1 % arabinose | 1.00 | p265 | galK | 1 | 550000 | 1490 | 0.0027091 | 0.0022013 |
| 1 % arabinose | 1.00 | p265 | galK | 2 | 620000 | 1050 | 0.0016935 | 0.0022013 |
| 0% arabinose | 0.00 | p266 | hisA | 1 | 440000 | 93 | 0.0002114 | 0.0002514 |
| 0% arabinose | 0.00 | p266 | hisA | 2 | 470000 | 137 | 0.0002915 | 0.0002514 |
| 0.01 % arabinose | 0.01 | p266 | hisA | 1 | 560000 | 390 | 0.0006964 | 0.0012019 |
| 0.01 % arabinose | 0.01 | p266 | hisA | 2 | 410000 | 700 | 0.0017073 | 0.0012019 |
| 0.1 % arabinose | 0.10 | p266 | hisA | 1 | 600000 | 790 | 0.0013167 | 0.0017833 |
| 0.1 % arabinose | 0.10 | p266 | hisA | 2 | 520000 | 1170 | 0.0022500 | 0.0017833 |
| 1 % arabinose | 1.00 | p266 | hisA | 1 | 540000 | 1260 | 0.0023333 | 0.0022768 |
| 1 % arabinose | 1.00 | p266 | hisA | 2 | 590000 | 1310 | 0.0022203 | 0.0022768 |
| 0% arabinose | 0.00 | pInt only | pInt_only | 1 | 500000 | 6 | 0.0000120 | 0.0000122 |
| 0% arabinose | 0.00 | pInt only | pInt_only | 2 | 560000 | 7 | 0.0000125 | 0.0000122 |
| 0.01 % arabinose | 0.01 | pInt only | pInt_only | 1 | 560000 | 7 | 0.0000125 | 0.0000138 |
| 0.01 % arabinose | 0.01 | pInt only | pInt_only | 2 | 530000 | 8 | 0.0000151 | 0.0000138 |
| 0.1 % arabinose | 0.10 | pInt only | pInt_only | 1 | 570000 | 8 | 0.0000140 | 0.0000144 |
| 0.1 % arabinose | 0.10 | pInt only | pInt_only | 2 | 610000 | 9 | 0.0000148 | 0.0000144 |
| 1 % arabinose | 1.00 | pInt only | pInt_only | 1 | 470000 | 20 | 0.0000426 | 0.0000391 |
| 1 % arabinose | 1.00 | pInt only | pInt_only | 2 | 450000 | 16 | 0.0000356 | 0.0000391 |
Now let’s plot:
plot_ara <- ggplot(df_ara, aes(x = factor(arabinose_per), y = eff, color = locus )) +
geom_path(data = df_ara %>% group_by(arabinose_per,locus, avg_eff) %>% summarise(),
aes(x = factor(arabinose_per), y = avg_eff,group = locus), size = 0.5) +
geom_jitter(shape = 21, width = 0.025, height = 0, alpha = 0.4) +
geom_point(data = . %>% filter(replicate ==1), aes(y = avg_eff))+
scale_fill_viridis_d(labels = c('galK','hisA','control'))+
scale_color_manual(values = c("#440154FF","#21908CFF", 'light gray'), labels = c('galK','hisA','control'))+
scale_y_log10(labels = scales::label_percent(accuracy = 0.001))+
labs(x = 'Arabinose %', y ='Efficiency', fill = 'Locus')
plot_araThis experiment tested the effect of recoverying in arabinose following the ORBIT transformation for different periods of time.
df_timing <- read_csv('../../data/low_throughput_experiments/2022_07_13_recovery_time_data.csv') %>% #read in csv
mutate(eff = Kan_count / LB_count) %>% group_by(rec_time, locus) %>% mutate(avg_eff = mean(eff, na.rm = T))#calculate efficiency and average efficiency for replicates
df_timing %>% kable() %>% kable_styling() %>% scroll_box(height = '250px')| condition | rec_time | locus | replicate | LB_count | Kan_count | eff | avg_eff |
|---|---|---|---|---|---|---|---|
| p265 0hr | 0.0 | galK | 1 | 58000 | 0 | 0.0000000 | 0.0000000 |
| p265 0hr | 0.0 | galK | 2 | 62000 | 0 | 0.0000000 | 0.0000000 |
| p265 30min | 0.5 | galK | 1 | 85000 | 46 | 0.0005412 | 0.0008004 |
| p265 30min | 0.5 | galK | 2 | 84000 | 89 | 0.0010595 | 0.0008004 |
| p265 1hr | 1.0 | galK | 1 | 69000 | 270 | 0.0039130 | 0.0043069 |
| p265 1hr | 1.0 | galK | 2 | 117000 | 550 | 0.0047009 | 0.0043069 |
| p265 3hr | 3.0 | galK | 1 | 3700000 | 1700 | 0.0004595 | 0.0005538 |
| p265 3hr | 3.0 | galK | 2 | 5400000 | 3500 | 0.0006481 | 0.0005538 |
| p265 6hr | 6.0 | galK | 1 | 6800000 | 2600 | 0.0003824 | 0.0004700 |
| p265 6hr | 6.0 | galK | 2 | 5200000 | 2900 | 0.0005577 | 0.0004700 |
| p266 0hr | 0.0 | hisA | 1 | 64000 | 0 | 0.0000000 | 0.0000000 |
| p266 0hr | 0.0 | hisA | 2 | 61000 | 0 | 0.0000000 | 0.0000000 |
| p266 30min | 0.5 | hisA | 1 | 320000 | 40 | 0.0001250 | 0.0001654 |
| p266 30min | 0.5 | hisA | 2 | 340000 | 70 | 0.0002059 | 0.0001654 |
| p266 1hr | 1.0 | hisA | 1 | 360000 | 590 | 0.0016389 | 0.0012694 |
| p266 1hr | 1.0 | hisA | 2 | 400000 | 360 | 0.0009000 | 0.0012694 |
| p266 3hr | 3.0 | hisA | 1 | 3200000 | 4600 | 0.0014375 | 0.0010187 |
| p266 3hr | 3.0 | hisA | 2 | 6000000 | 3600 | 0.0006000 | 0.0010187 |
| p266 6hr | 6.0 | hisA | 1 | 5500000 | 3700 | 0.0006727 | 0.0006248 |
| p266 6hr | 6.0 | hisA | 2 | 5200000 | 3000 | 0.0005769 | 0.0006248 |
| pInt 0hr | 0.0 | pInt_only | 1 | 56000 | 0 | 0.0000000 | 0.0000000 |
| pInt 0hr | 0.0 | pInt_only | 2 | 47000 | 0 | 0.0000000 | 0.0000000 |
| pInt 30min | 0.5 | pInt_only | 1 | 310000 | 1 | 0.0000032 | 0.0000035 |
| pInt 30min | 0.5 | pInt_only | 2 | 260000 | 1 | 0.0000038 | 0.0000035 |
| pInt 1hr | 1.0 | pInt_only | 1 | 370000 | 8 | 0.0000216 | 0.0000150 |
| pInt 1hr | 1.0 | pInt_only | 2 | 360000 | 3 | 0.0000083 | 0.0000150 |
| pInt 3hr | 3.0 | pInt_only | 1 | 4900000 | 21 | 0.0000043 | 0.0000048 |
| pInt 3hr | 3.0 | pInt_only | 2 | 4000000 | 21 | 0.0000052 | 0.0000048 |
| pInt 6hr | 6.0 | pInt_only | 1 | 6000000 | 10 | 0.0000017 | 0.0000018 |
| pInt 6hr | 6.0 | pInt_only | 2 | 4800000 | 9 | 0.0000019 | 0.0000018 |
You can see from the table, at zero hrs no colonies were recovered in any of the conditions. These points are not visible on the log scale of the plot, but they were indeed measured. Let’s plot:
plot_timing <- ggplot(df_timing, aes(x = factor(rec_time), y = eff, color = locus )) +
geom_path(data = df_timing %>% group_by(rec_time,locus, avg_eff) %>% summarise(),
aes(x = factor(rec_time), y = avg_eff, group = locus), size = 0.5) +
geom_jitter(shape = 21, width = 0.025, height = 0, alpha =0.4) +
geom_point(data = . %>% filter(replicate ==1), aes(y=avg_eff))+
scale_color_manual(values = c("#440154FF","#21908CFF", 'light gray'), labels = c('galK','hisA','control'))+
scale_y_log10(labels = scales::label_percent(accuracy = 0.001))+
scale_x_discrete(labels = c( '0 min', '30 min', '1 hr', '3 hr', '6 hr'))+
labs(x = 'Recovery time', y ='Efficiency', fill = 'Locus')
plot_timingtheme_set(theme_figure())
plot_cond_legend <- plot_grid(plot_cond, plot_cond_labs, ncol = 1, rel_heights = c(3,1), rel_widths = c(1,1),
align = 'hv', axis = 'lr', scale = 0.9)
fig_2_bottom <- plot_grid(plot_to_len + guides(color = 'none', fill = 'none'), plot_lag+ guides(shape = 'none'),
plot_to_conc+ guides(color = 'none', fill = 'none'),plot_pint_conc+ guides(color = 'none'),
plot_ara + guides(color = 'none'), plot_timing+ guides(color = 'none'),
ncol = 2, rel_heights = c(1,1), rel_widths = c(1,1),
align = 'hv', axis = 'lr', scale = 0.9,
labels = c('C','D','E','F','G','H'))
fig_2 <- plot_grid(plot_cond_legend, fig_2_bottom, ncol = 1, rel_heights = c(1,3), scale = 1.0, labels = c('B'))
fig_2save_plot("../../figures/r_pdf_figs/main_figs/fig_2_optimization.pdf", fig_2, base_width = 7, base_height = 7)sessionInfo()## R version 4.2.0 (2022-04-22)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Big Sur/Monterey 10.16
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.2/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.2/Resources/lib/libRlapack.dylib
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] kableExtra_1.3.4 cowplot_1.1.1 viridis_0.6.2 viridisLite_0.4.1
## [5] knitr_1.41 forcats_0.5.2 stringr_1.5.0 dplyr_1.1.0
## [9] purrr_0.3.5 readr_2.1.3 tidyr_1.2.1 tibble_3.1.8
## [13] ggplot2_3.4.0 tidyverse_1.3.2
##
## loaded via a namespace (and not attached):
## [1] httr_1.4.4 sass_0.4.4 bit64_4.0.5
## [4] vroom_1.6.0 jsonlite_1.8.3 modelr_0.1.10
## [7] bslib_0.4.1 assertthat_0.2.1 highr_0.9
## [10] googlesheets4_1.0.1 cellranger_1.1.0 yaml_2.3.6
## [13] pillar_1.8.1 backports_1.4.1 glue_1.6.2
## [16] digest_0.6.30 rvest_1.0.3 colorspace_2.0-3
## [19] htmltools_0.5.4 pkgconfig_2.0.3 broom_1.0.1
## [22] haven_2.5.1 scales_1.2.1 webshot_0.5.4
## [25] svglite_2.1.0 tzdb_0.3.0 timechange_0.1.1
## [28] googledrive_2.0.0 generics_0.1.3 farver_2.1.1
## [31] ellipsis_0.3.2 cachem_1.0.6 withr_2.5.0
## [34] cli_3.4.1 magrittr_2.0.3 crayon_1.5.2
## [37] readxl_1.4.1 evaluate_0.18 fs_1.5.2
## [40] fansi_1.0.3 xml2_1.3.3 textshaping_0.3.6
## [43] tools_4.2.0 hms_1.1.2 gargle_1.2.1
## [46] lifecycle_1.0.3 munsell_0.5.0 reprex_2.0.2
## [49] compiler_4.2.0 jquerylib_0.1.4 systemfonts_1.0.4
## [52] rlang_1.0.6 grid_4.2.0 rstudioapi_0.14
## [55] labeling_0.4.2 rmarkdown_2.18 gtable_0.3.1
## [58] DBI_1.1.3 R6_2.5.1 gridExtra_2.3
## [61] lubridate_1.9.0 fastmap_1.1.0 bit_4.0.5
## [64] utf8_1.2.2 ragg_1.2.5 stringi_1.7.8
## [67] parallel_4.2.0 vctrs_0.5.2 dbplyr_2.2.1
## [70] tidyselect_1.2.0 xfun_0.35