This figure contains data from many different experiments that were used to optimize the protocol for ORBIT. Data figures were made in R notebooks and exported as pdfs. Cosmetic improvements were made in Adobe Illustrator. Note that Figure 2A is a diagram that was made in Adobe Illustrator.
Setup packages and plotting for the notebook:
# Check packages
source("../tools/package_setup.R")
# Load packages
library(tidyverse)
library(cowplot)
library(kableExtra)
# Code display options
::opts_chunk$set(tidy.opts=list(width.cutoff=60),tidy=FALSE, echo = TRUE, message=FALSE, warning=FALSE, fig.align="center", fig.retina = 2)
knitr
# Load plotting tools
source("../tools/plotting_tools.R")
#Modify the plot theme
theme_set(theme_notebook())
This experiment used a ∆galK targeting oligo with a pInt_kanR integrating plasmid and tested different helper plasmid induction schemes. Let’s first read in the data.
<- read_csv("../../data/low_throughput_experiments/2021_07_01_tol_ara_params.csv")
df_cond
%>% kable() %>% kable_styling() %>% scroll_box(height = '250px') df_cond
cond_name | cond_id | pre_tol | pre_ara | post_tol | post_ara | rep | LB | Kan | Kan_LB |
---|---|---|---|---|---|---|---|---|---|
uninduced - - | 1 | FALSE | FALSE | FALSE | FALSE | 1 | 3000000 | 22 | 0.0000073 |
uninduced - - | 1 | FALSE | FALSE | FALSE | FALSE | 2 | 3900000 | 13 | 0.0000033 |
uninduced - - | 1 | FALSE | FALSE | FALSE | FALSE | 3 | 5000000 | 5 | 0.0000010 |
uninduced - - | 1 | FALSE | FALSE | FALSE | FALSE | NC | 3800000 | 8 | 0.0000021 |
uninduced - + | 2 | FALSE | FALSE | FALSE | TRUE | 1 | 3000000 | 20 | 0.0000067 |
uninduced - + | 2 | FALSE | FALSE | FALSE | TRUE | 2 | 2600000 | 10 | 0.0000038 |
uninduced - + | 2 | FALSE | FALSE | FALSE | TRUE | 3 | 2000000 | 16 | 0.0000080 |
uninduced - + | 2 | FALSE | FALSE | FALSE | TRUE | NC | 2400000 | 15 | 0.0000063 |
uninduced + + | 3 | FALSE | FALSE | TRUE | TRUE | 1 | 330000 | 4 | 0.0000121 |
uninduced + + | 3 | FALSE | FALSE | TRUE | TRUE | 2 | 420000 | 5 | 0.0000119 |
uninduced + + | 3 | FALSE | FALSE | TRUE | TRUE | 3 | 320000 | 6 | 0.0000187 |
uninduced + + | 3 | FALSE | FALSE | TRUE | TRUE | NC | 250000 | 8 | 0.0000320 |
tol induced - - | 4 | TRUE | FALSE | FALSE | FALSE | 1 | 3400000 | 3200 | 0.0009412 |
tol induced - - | 4 | TRUE | FALSE | FALSE | FALSE | 2 | 3000000 | 2800 | 0.0009333 |
tol induced - - | 4 | TRUE | FALSE | FALSE | FALSE | 3 | 1700000 | 1600 | 0.0009412 |
tol induced - - | 4 | TRUE | FALSE | FALSE | FALSE | NC | 2100000 | 9 | 0.0000043 |
tol induced - + | 5 | TRUE | FALSE | FALSE | TRUE | 1 | 1760000 | 8300 | 0.0047159 |
tol induced - + | 5 | TRUE | FALSE | FALSE | TRUE | 2 | 2400000 | 7100 | 0.0029583 |
tol induced - + | 5 | TRUE | FALSE | FALSE | TRUE | 3 | 2600000 | 5000 | 0.0019231 |
tol induced - + | 5 | TRUE | FALSE | FALSE | TRUE | NC | 3400000 | 8 | 0.0000024 |
tol induced + + | 6 | TRUE | FALSE | TRUE | TRUE | 1 | 210000 | 63 | 0.0003000 |
tol induced + + | 6 | TRUE | FALSE | TRUE | TRUE | 2 | 110000 | 45 | 0.0004091 |
tol induced + + | 6 | TRUE | FALSE | TRUE | TRUE | 3 | 260000 | 90 | 0.0003462 |
tol induced + + | 6 | TRUE | FALSE | TRUE | TRUE | NC | 180000 | 9 | 0.0000500 |
tol + ara induced - - | 7 | TRUE | TRUE | FALSE | FALSE | 1 | 2600000 | 2400 | 0.0009231 |
tol + ara induced - - | 7 | TRUE | TRUE | FALSE | FALSE | 2 | 3100000 | 5600 | 0.0018065 |
tol + ara induced - - | 7 | TRUE | TRUE | FALSE | FALSE | 3 | 2900000 | 3900 | 0.0013448 |
tol + ara induced - - | 7 | TRUE | TRUE | FALSE | FALSE | NC | 1100000 | 33 | 0.0000300 |
tol + ara induced - + | 8 | TRUE | TRUE | FALSE | TRUE | 1 | 890000 | 6400 | 0.0071910 |
tol + ara induced - + | 8 | TRUE | TRUE | FALSE | TRUE | 2 | 1900000 | 10200 | 0.0053684 |
tol + ara induced - + | 8 | TRUE | TRUE | FALSE | TRUE | 3 | 2000000 | 6600 | 0.0033000 |
tol + ara induced - + | 8 | TRUE | TRUE | FALSE | TRUE | NC | 2400000 | 56 | 0.0000233 |
tol + ara induced + + | 9 | TRUE | TRUE | TRUE | TRUE | 1 | 150000 | 150 | 0.0010000 |
tol + ara induced + + | 9 | TRUE | TRUE | TRUE | TRUE | 2 | 140000 | 160 | 0.0011429 |
tol + ara induced + + | 9 | TRUE | TRUE | TRUE | TRUE | 3 | 190000 | 90 | 0.0004737 |
tol + ara induced + + | 9 | TRUE | TRUE | TRUE | TRUE | NC | 40000 | 6 | 0.0001500 |
Now let’s plot the data. Note that the labeling of each condition gets complicated, so we made a special legend with + / - for each possible induction.
#Plot individual replicates, mean points / crossbars, and negative control Xs
<- ggplot(df_cond %>% filter(rep != 'NC'), aes(x = factor(cond_id), y = Kan_LB)) +
plot_cond geom_point(data = df_cond %>% filter(rep == 'NC') , shape = 4, color = 'light gray') +
geom_jitter(width = 0.1, height = 0, shape =21, alpha = 0.4, color = "#440154FF") +
stat_summary(fun = 'mean', geom = 'crossbar', width = 0.5, size = 0.25, color = "#440154FF")+
stat_summary(fun = 'mean', geom = 'point', width = 0.5, color = "#440154FF")+
scale_y_log10(labels = scales::label_percent(accuracy = 0.0001), breaks = c(0.000001,0.0001, 0.01))+
scale_x_discrete(labels = NULL)+
theme(axis.text.x = element_text(angle = 45, hjust = 1))+
labs(x = NULL, y = 'Efficiency')
<- df_cond %>% filter(rep == 1) %>% select(cond_id, pre_tol, pre_ara, post_tol, post_ara) %>% pivot_longer(cols = c('pre_tol','pre_ara','post_tol','post_ara'), names_to = 'inducer') %>% mutate(value = ifelse(value == T, '+','-')) %>%
plot_cond_labs ggplot(aes(x = factor(cond_id), y = inducer, label = value)) + geom_text() +
scale_y_discrete(limits = c('post_ara','pre_ara','post_tol','pre_tol'),labels = c('post ara','pre ara','post tol','pre tol')) +
labs(x = NULL, y = NULL) + theme(axis.line = element_blank(), axis.text.x = element_blank(), axis.ticks = element_blank())
<- plot_grid(plot_cond, plot_cond_labs, ncol = 1, rel_heights = c(4,1), rel_widths = c(1,1),
plot_cond_legend align = 'hv', axis = 'lr', scale = 1.0)
plot_cond_legend
This experiment tested the effect of targeting oligo length. Identical oligos were used for the four different loci, but their homology arms varied in length. The total length of the oligo is reported here, which includes both homology arms and the 38 bp attB site. Let’s read in the data.
<- read_csv("../../data/low_throughput_experiments/2022_02_15_orbit_TO_len_data.csv") %>%
df_len mutate(eff = Kan / LB)
%>% kable() %>% kable_styling() %>% scroll_box(height = '250px') df_len
targeting_oligo | gene | TO_len | replicate | LB | Kan | eff |
---|---|---|---|---|---|---|
269 | galK | 74 | 1 | 530000 | 190 | 0.0003585 |
269 | galK | 74 | 2 | 860000 | 150 | 0.0001744 |
269 | galK | 74 | 3 | 1120000 | 220 | 0.0001964 |
264 | galK | 90 | 1 | 970000 | 28000 | 0.0288660 |
264 | galK | 90 | 2 | 400000 | 7900 | 0.0197500 |
264 | galK | 90 | 3 | 930000 | 9200 | 0.0098925 |
270 | galK | 104 | 1 | 920000 | 25000 | 0.0271739 |
270 | galK | 104 | 2 | 940000 | 22000 | 0.0234043 |
270 | galK | 104 | 3 | 1090000 | 15000 | 0.0137615 |
265 | galK | 120 | 1 | 1140000 | 24000 | 0.0210526 |
265 | galK | 120 | 2 | 910000 | 23000 | 0.0252747 |
265 | galK | 120 | 3 | 950000 | 18000 | 0.0189474 |
271 | hisA | 74 | 1 | 810000 | 230 | 0.0002840 |
271 | hisA | 74 | 2 | 890000 | 150 | 0.0001685 |
271 | hisA | 74 | 3 | 1010000 | 210 | 0.0002079 |
272 | hisA | 90 | 1 | 640000 | 1800 | 0.0028125 |
272 | hisA | 90 | 2 | 500000 | 2400 | 0.0048000 |
272 | hisA | 90 | 3 | 820000 | 3000 | 0.0036585 |
273 | hisA | 104 | 1 | 750000 | 17000 | 0.0226667 |
273 | hisA | 104 | 2 | 1130000 | 24000 | 0.0212389 |
273 | hisA | 104 | 3 | 770000 | 20000 | 0.0259740 |
266 | hisA | 120 | 1 | 1900000 | 27000 | 0.0142105 |
266 | hisA | 120 | 2 | 960000 | 31000 | 0.0322917 |
266 | hisA | 120 | 3 | 810000 | 40000 | 0.0493827 |
274 | metA | 74 | 1 | 820000 | 80 | 0.0000976 |
274 | metA | 74 | 2 | 740000 | 230 | 0.0003108 |
274 | metA | 74 | 3 | 740000 | 19 | 0.0000257 |
275 | metA | 90 | 1 | 710000 | 800 | 0.0011268 |
275 | metA | 90 | 2 | 870000 | 850 | 0.0009770 |
275 | metA | 90 | 3 | 790000 | 580 | 0.0007342 |
276 | metA | 104 | 1 | 600000 | 1200 | 0.0020000 |
276 | metA | 104 | 2 | 790000 | 2500 | 0.0031646 |
276 | metA | 104 | 3 | 850000 | 3400 | 0.0040000 |
267 | metA | 120 | 1 | 840000 | 10700 | 0.0127381 |
267 | metA | 120 | 2 | 730000 | 12800 | 0.0175342 |
267 | metA | 120 | 3 | 790000 | 10200 | 0.0129114 |
277 | leuD | 74 | 1 | 1300000 | 76 | 0.0000585 |
277 | leuD | 74 | 2 | 950000 | 78 | 0.0000821 |
277 | leuD | 74 | 3 | 760000 | 64 | 0.0000842 |
278 | leuD | 90 | 1 | 810000 | 1300 | 0.0016049 |
278 | leuD | 90 | 2 | 940000 | 1260 | 0.0013404 |
278 | leuD | 90 | 3 | 750000 | 1270 | 0.0016933 |
279 | leuD | 104 | 1 | 740000 | 1700 | 0.0022973 |
279 | leuD | 104 | 2 | 720000 | 2200 | 0.0030556 |
279 | leuD | 104 | 3 | 1030000 | 1800 | 0.0017476 |
268 | leuD | 120 | 1 | 670000 | 2800 | 0.0041791 |
268 | leuD | 120 | 2 | 570000 | 1770 | 0.0031053 |
268 | leuD | 120 | 3 | 710000 | 1600 | 0.0022535 |
pInt_only | NA | NA | 1 | 1090000 | 300 | 0.0002752 |
pInt_only | NA | NA | 2 | 1190000 | 250 | 0.0002101 |
pInt_only | NA | NA | 3 | 1090000 | 130 | 0.0001193 |
Let’s plot the data.
# Calculate condition means and standard deviations
<- df_len %>%
df_len_summary filter(targeting_oligo != 'pInt_only') %>%
group_by(gene, TO_len) %>%
summarise(mean = mean(eff), sd = sd(eff))
# Get negative control value
<- df_len %>% filter(targeting_oligo == 'pInt_only')
df_len_control <- mean(df_len_control$eff)
to_len_pInt
# Plot individual replicates, mean points, and connecting lines
<- ggplot(df_len, aes(x = TO_len, y = eff, color = gene)) +
plot_to_len geom_hline(yintercept = to_len_pInt, color = 'gray', linetype = 'dashed')+
geom_jitter(shape = 21, alpha = 0.4, width =1, height =0) +
geom_point(data = df_len_summary,
aes(y = mean),
position = position_jitter(height = 0, width = 0.1))+
# geom_point(data = df_len_summary %>% filter(!(gene=='metA' & TO_len == 74)),
# aes(y = mean),
# position = position_jitter(height = 0, width = 0.1))+
# geom_point(data = df_len_summary %>% filter(gene == 'metA' & TO_len == 74),
# aes(y = mean),
# position = position_jitter(height = 0, width = 0.5))+
geom_line(data = df_len_summary, aes(y = mean)) +
scale_y_log10(labels = scales::label_percent(accuracy = 0.01)) +
scale_x_continuous(breaks = c(74,90,104,120))+
scale_colour_viridis_d(limits = c('galK','hisA','metA','leuD')) + scale_fill_viridis_d(limits = c('galK','hisA','metA','leuD')) +
labs(x = "Targeting oligo length (nt)", y = "Efficiency", color = NULL)
plot_to_len
This experiment tested targeting oligos binding the leading or lagging strand at each locus. 120 nt TOs were used. Let’s read in the data:
<- read_csv('../../data/low_throughput_experiments/2022_03_07_leading_lagging_data.csv') %>% #read in csv
df_lag mutate(eff = Kan_count / LB_count) %>% group_by(locus, strand) %>% mutate(avg_eff = mean(eff)) %>% #calculate efficiency and average efficiency for replicates
mutate(locus = factor(locus, levels = c('galK','hisA','metA','leuD','pInt only'))) %>%
mutate(strand = factor(strand, levels = c('leading','lagging','none')))
%>% kable() %>% kable_styling() %>% scroll_box(height = '250px') df_lag
locus | strand | replicate | LB_count | Kan_count | eff | avg_eff |
---|---|---|---|---|---|---|
galK | lagging | 1 | 23000000 | 130000 | 0.0056522 | 0.0061019 |
galK | lagging | 2 | 29000000 | 190000 | 0.0065517 | 0.0061019 |
galK | leading | 1 | 27000000 | 310 | 0.0000115 | 0.0000448 |
galK | leading | 2 | 7300000 | 570 | 0.0000781 | 0.0000448 |
hisA | lagging | 1 | 30000000 | 460000 | 0.0153333 | 0.0147037 |
hisA | lagging | 2 | 27000000 | 380000 | 0.0140741 | 0.0147037 |
hisA | leading | 1 | 27000000 | 670 | 0.0000248 | 0.0000322 |
hisA | leading | 2 | 22000000 | 870 | 0.0000395 | 0.0000322 |
metA | lagging | 1 | 1140000 | 13500 | 0.0118421 | 0.0093622 |
metA | lagging | 2 | 1700000 | 11700 | 0.0068824 | 0.0093622 |
metA | leading | 1 | 1700000 | 1260 | 0.0007412 | 0.0009649 |
metA | leading | 2 | 1220000 | 1450 | 0.0011885 | 0.0009649 |
leuD | lagging | 1 | 970000 | 3400 | 0.0035052 | 0.0036507 |
leuD | lagging | 2 | 1080000 | 4100 | 0.0037963 | 0.0036507 |
leuD | leading | 1 | 1040000 | 330 | 0.0003173 | 0.0002857 |
leuD | leading | 2 | 1220000 | 310 | 0.0002541 | 0.0002857 |
pInt only | none | 1 | 1300000 | 153 | 0.0001177 | 0.0000808 |
pInt only | none | 2 | 2500000 | 110 | 0.0000440 | 0.0000808 |
Now let’s plot:
#Get negative control values
<- (df_lag %>% filter(strand=='none'))$avg_eff[1]
lag_pInt
#Plot individual replicates, mean points and crossbars and negative control values.
<- ggplot(df_lag %>% filter(strand!='none'), aes(x = strand, y = eff, color = locus)) +
plot_lag geom_hline(yintercept = lag_pInt, color = 'light gray', linetype = 2)+
geom_point(position = position_dodge(width = 1), alpha =0.4, fill = NA, shape = 21) +
stat_summary(fun = 'mean', geom = 'crossbar',position = position_dodge(width = 1), width = 0.5, size = 0.25)+
stat_summary(fun = 'mean', geom = 'point',position = position_dodge(width = 1), width = 0.5)+
facet_grid(~locus)+
scale_color_viridis_d()+
scale_y_log10(labels = scales::label_percent(accuracy = 0.001), breaks = c(0.0001, 0.001,0.01)) +
scale_x_discrete(labels = c('lead','lag'))+
labs(y = 'Efficiency', x = 'Targeting oligo strand', color = NULL)+guides( color = 'none')+
theme(panel.border = element_rect(color = 'black', fill = NA))
plot_lag
This experiment tested the effect of TO concentration (final in 50 µL cell aliquots). Standard 120 nt TOs were used for each locus. Let’s read in the data:
<- read_csv("../../data/low_throughput_experiments/2022_09_28_TO_conc_4_loci_data.csv")%>%
df_to_conc mutate(eff = Kan_count / LB_count) %>%
group_by(TO_conc, locus, condition) %>%
mutate(avg_eff = mean(eff, na.rm = T)) %>% #calculate efficiency and average efficiency for replicates
mutate(locus = factor(locus, levels = c('galK','hisA','metA','leuD')))
%>% kable() %>% kable_styling() %>% scroll_box(height = '250px') df_to_conc
condition | TO_conc | locus | replicate | LB_count | Kan_count | eff | avg_eff |
---|---|---|---|---|---|---|---|
4uM galK | 4000 | galK | 1 | 1300000 | 570 | 0.0004385 | 0.0003604 |
4uM galK | 4000 | galK | 2 | 1700000 | 480 | 0.0002824 | 0.0003604 |
1uM galK | 1000 | galK | 1 | 1700000 | 820 | 0.0004824 | 0.0004252 |
1uM galK | 1000 | galK | 2 | 2500000 | 920 | 0.0003680 | 0.0004252 |
100nM galK | 100 | galK | 1 | 2400000 | 900 | 0.0003750 | 0.0005486 |
100nM galK | 100 | galK | 2 | 1800000 | 1300 | 0.0007222 | 0.0005486 |
10nM galK | 10 | galK | 1 | 1100000 | 560 | 0.0005091 | 0.0002962 |
10nM galK | 10 | galK | 2 | 2400000 | 200 | 0.0000833 | 0.0002962 |
4uM hisA | 4000 | hisA | 1 | 1700000 | 1230 | 0.0007235 | 0.0008618 |
4uM hisA | 4000 | hisA | 2 | 1000000 | 1000 | 0.0010000 | 0.0008618 |
1uM hisA | 1000 | hisA | 1 | 1610000 | 730 | 0.0004534 | 0.0002737 |
1uM hisA | 1000 | hisA | 2 | 1160000 | 109 | 0.0000940 | 0.0002737 |
100nM hisA | 100 | hisA | 1 | 2400000 | 1600 | 0.0006667 | 0.0011333 |
100nM hisA | 100 | hisA | 2 | 2500000 | 4000 | 0.0016000 | 0.0011333 |
10nM hisA | 10 | hisA | 1 | 2500000 | 830 | 0.0003320 | 0.0007035 |
10nM hisA | 10 | hisA | 2 | 1200000 | 1290 | 0.0010750 | 0.0007035 |
4uM metA | 4000 | metA | 1 | 2000000 | 860 | 0.0004300 | 0.0004885 |
4uM metA | 4000 | metA | 2 | 1700000 | 930 | 0.0005471 | 0.0004885 |
1uM metA | 1000 | metA | 1 | 2400000 | 1800 | 0.0007500 | 0.0005950 |
1uM metA | 1000 | metA | 2 | 1500000 | 660 | 0.0004400 | 0.0005950 |
100nM metA | 100 | metA | 1 | 2200000 | 1830 | 0.0008318 | 0.0010945 |
100nM metA | 100 | metA | 2 | 2800000 | 3800 | 0.0013571 | 0.0010945 |
10nM metA | 10 | metA | 1 | 1100000 | 610 | 0.0005545 | 0.0007523 |
10nM metA | 10 | metA | 2 | 1000000 | 950 | 0.0009500 | 0.0007523 |
4uM leuD | 4000 | leuD | 1 | 1700000 | 330 | 0.0001941 | 0.0002037 |
4uM leuD | 4000 | leuD | 2 | 1500000 | 320 | 0.0002133 | 0.0002037 |
1uM leuD | 1000 | leuD | 1 | 1600000 | 140 | 0.0000875 | 0.0001009 |
1uM leuD | 1000 | leuD | 2 | 1400000 | 160 | 0.0001143 | 0.0001009 |
100nM leuD | 100 | leuD | 1 | 1500000 | 480 | 0.0003200 | 0.0002475 |
100nM leuD | 100 | leuD | 2 | 1600000 | 280 | 0.0001750 | 0.0002475 |
10nM leuD | 10 | leuD | 1 | 1800000 | 80 | 0.0000444 | 0.0000568 |
10nM leuD | 10 | leuD | 2 | 2600000 | 180 | 0.0000692 | 0.0000568 |
pInt only | 0 | NA | 1 | 2900000 | 30 | 0.0000103 | 0.0000172 |
pInt only | 0 | NA | 2 | 1500000 | 36 | 0.0000240 | 0.0000172 |
Let’s plot the data:
#Calculate negative control value
<- (df_to_conc %>% filter(condition == 'pInt only'))$avg_eff[1]
to_conc_pInt
#Plot with individual observations, mean points and connecting lines
<- ggplot(df_to_conc %>% filter(TO_conc>0), aes(x = TO_conc, y = avg_eff, color = locus)) +
plot_to_conc geom_hline(yintercept = to_conc_pInt, linetype = 2, color = 'light gray')+
geom_jitter(aes(y = eff), shape = 21, alpha = 0.4, width = 0.1, height = 0)+
geom_point() + geom_line() + scale_x_log10(breaks = c(10,100,1000,4000), labels = c('10 nM', '100 nM', '1 µM','4 µM')) + scale_color_viridis_d() +
scale_y_continuous(labels = scales::label_percent(), trans = 'log10', breaks = c(0.001, 0.0001, 0.00001), limits = c(0.00001,NA))+
labs(y = 'Efficiency',x = 'Targeting oligo concentration')
plot_to_conc
This experiment tested the effect of how much integrating plasmid (pInt_attP1_kanR) was added to the ORBIT transformation. Let’s read in the data:
<- read_csv('../../data/low_throughput_experiments/2022_03_31_pInt_conc_galK_hisA_data.csv') %>% #read in csv
df_pint_conc mutate(eff = Kan_count / LB_count) %>% group_by(pInt_ng, TO_added, locus) %>% mutate(avg_eff = mean(eff, na.rm = T)) #calculate efficiency and average efficiency for replicates
%>% kable() %>% kable_styling() %>% scroll_box(height = '250px') df_pint_conc
condition | pInt_ng | TO_added | locus | replicate | LB_count | Kan_count | eff | avg_eff |
---|---|---|---|---|---|---|---|---|
1ng pInt | 1 | FALSE | pInt_only | 1 | 1.02e+08 | 7 | 0.0000001 | 0.0000001 |
1ng pInt | 1 | FALSE | pInt_only | 2 | 7.00e+07 | 8 | 0.0000001 | 0.0000001 |
10ng pInt | 10 | FALSE | pInt_only | 1 | 6.60e+07 | 20 | 0.0000003 | 0.0000003 |
10ng pInt | 10 | FALSE | pInt_only | 2 | 7.90e+07 | 20 | 0.0000003 | 0.0000003 |
100ng pInt | 100 | FALSE | pInt_only | 1 | 6.80e+07 | 290 | 0.0000043 | 0.0000033 |
100ng pInt | 100 | FALSE | pInt_only | 2 | 7.20e+07 | 170 | 0.0000024 | 0.0000033 |
278ng pInt | 278 | FALSE | pInt_only | 1 | 7.20e+07 | 280 | 0.0000039 | 0.0000061 |
278ng pInt | 278 | FALSE | pInt_only | 2 | 6.20e+07 | 520 | 0.0000084 | 0.0000061 |
1ng pInt + p265 | 1 | TRUE | galK | 1 | 6.90e+07 | 510 | 0.0000074 | 0.0000099 |
1ng pInt + p265 | 1 | TRUE | galK | 2 | 6.40e+07 | 800 | 0.0000125 | 0.0000099 |
10ng pInt + p265 | 10 | TRUE | galK | 1 | 5.80e+07 | 4300 | 0.0000741 | 0.0001103 |
10ng pInt + p265 | 10 | TRUE | galK | 2 | 5.60e+07 | 8200 | 0.0001464 | 0.0001103 |
100ng pInt + p265 | 100 | TRUE | galK | 1 | 2.50e+07 | 35000 | 0.0014000 | 0.0016583 |
100ng pInt + p265 | 100 | TRUE | galK | 2 | 2.40e+07 | 46000 | 0.0019167 | 0.0016583 |
278ng pInt + p265 | 278 | TRUE | galK | 1 | 2.10e+07 | 75000 | 0.0035714 | 0.0028151 |
278ng pInt + p265 | 278 | TRUE | galK | 2 | 1.70e+07 | 35000 | 0.0020588 | 0.0028151 |
1ng pInt + p266 | 1 | TRUE | hisA | 1 | 3.10e+07 | 940 | 0.0000303 | 0.0000311 |
1ng pInt + p266 | 1 | TRUE | hisA | 2 | 2.60e+07 | 830 | 0.0000319 | 0.0000311 |
10ng pInt + p266 | 10 | TRUE | hisA | 1 | 7.00e+07 | 22000 | 0.0003143 | 0.0003264 |
10ng pInt + p266 | 10 | TRUE | hisA | 2 | 6.50e+07 | 22000 | 0.0003385 | 0.0003264 |
100ng pInt + p266 | 100 | TRUE | hisA | 1 | 6.10e+07 | 97000 | 0.0015902 | 0.0015902 |
100ng pInt + p266 | 100 | TRUE | hisA | 2 | 5.60e+07 | NA | NA | 0.0015902 |
278ng pInt + p266 | 278 | TRUE | hisA | 1 | 5.20e+07 | 180000 | 0.0034615 | 0.0054808 |
278ng pInt + p266 | 278 | TRUE | hisA | 2 | 5.60e+07 | 420000 | 0.0075000 | 0.0054808 |
Note that the second hisA 100ng data point is NA because it came back as zero colonies on the plate, which was an obvious error.
<- ggplot(df_pint_conc, aes(x = pInt_ng, y = eff, color = locus )) +
plot_pint_conc geom_path(data = df_pint_conc %>% group_by(pInt_ng, TO_added,locus, avg_eff) %>% summarise(),
aes(x = pInt_ng, y = avg_eff, group = locus), size = 0.5) +
geom_jitter(shape = 21, width = 0.025, height = 0, alpha = 0.4) +
geom_point(data = . %>% filter(replicate==1), aes(y = avg_eff))+
scale_color_manual(values = c("#440154FF","#21908CFF", 'light gray'), labels = c('galK','hisA','control'))+
scale_x_log10()+
scale_y_log10(labels = scales::label_percent(accuracy = 0.0001))+
labs(x = 'Integrating plasmid concentration (ng) ', y ='Efficiency', fill = 'Locus')
plot_pint_conc
This experiment tested the effect of arabinose (bxb-1 inducer for pHelper-Ec1-gentR) in a 1 hr recovery culture. Let’s read in the data:
<- read_csv('../../data/low_throughput_experiments/2022_06_21_arabinose_levels_data.csv') %>% #read in csv
df_ara mutate(eff = Kan_count / LB_count) %>% group_by(arabinose_per, locus) %>% mutate(avg_eff = mean(eff)) #calculate efficiency and average efficiency for replicates
%>% kable() %>% kable_styling() %>% scroll_box(height = '250px') df_ara
condition | arabinose_per | TO | locus | replicate | LB_count | Kan_count | eff | avg_eff |
---|---|---|---|---|---|---|---|---|
0% arabinose | 0.00 | p265 | galK | 1 | 430000 | 370 | 0.0008605 | 0.0006352 |
0% arabinose | 0.00 | p265 | galK | 2 | 610000 | 250 | 0.0004098 | 0.0006352 |
0.01 % arabinose | 0.01 | p265 | galK | 1 | 680000 | 550 | 0.0008088 | 0.0008317 |
0.01 % arabinose | 0.01 | p265 | galK | 2 | 550000 | 470 | 0.0008545 | 0.0008317 |
0.1 % arabinose | 0.10 | p265 | galK | 1 | 440000 | 960 | 0.0021818 | 0.0018556 |
0.1 % arabinose | 0.10 | p265 | galK | 2 | 510000 | 780 | 0.0015294 | 0.0018556 |
1 % arabinose | 1.00 | p265 | galK | 1 | 550000 | 1490 | 0.0027091 | 0.0022013 |
1 % arabinose | 1.00 | p265 | galK | 2 | 620000 | 1050 | 0.0016935 | 0.0022013 |
0% arabinose | 0.00 | p266 | hisA | 1 | 440000 | 93 | 0.0002114 | 0.0002514 |
0% arabinose | 0.00 | p266 | hisA | 2 | 470000 | 137 | 0.0002915 | 0.0002514 |
0.01 % arabinose | 0.01 | p266 | hisA | 1 | 560000 | 390 | 0.0006964 | 0.0012019 |
0.01 % arabinose | 0.01 | p266 | hisA | 2 | 410000 | 700 | 0.0017073 | 0.0012019 |
0.1 % arabinose | 0.10 | p266 | hisA | 1 | 600000 | 790 | 0.0013167 | 0.0017833 |
0.1 % arabinose | 0.10 | p266 | hisA | 2 | 520000 | 1170 | 0.0022500 | 0.0017833 |
1 % arabinose | 1.00 | p266 | hisA | 1 | 540000 | 1260 | 0.0023333 | 0.0022768 |
1 % arabinose | 1.00 | p266 | hisA | 2 | 590000 | 1310 | 0.0022203 | 0.0022768 |
0% arabinose | 0.00 | pInt only | pInt_only | 1 | 500000 | 6 | 0.0000120 | 0.0000122 |
0% arabinose | 0.00 | pInt only | pInt_only | 2 | 560000 | 7 | 0.0000125 | 0.0000122 |
0.01 % arabinose | 0.01 | pInt only | pInt_only | 1 | 560000 | 7 | 0.0000125 | 0.0000138 |
0.01 % arabinose | 0.01 | pInt only | pInt_only | 2 | 530000 | 8 | 0.0000151 | 0.0000138 |
0.1 % arabinose | 0.10 | pInt only | pInt_only | 1 | 570000 | 8 | 0.0000140 | 0.0000144 |
0.1 % arabinose | 0.10 | pInt only | pInt_only | 2 | 610000 | 9 | 0.0000148 | 0.0000144 |
1 % arabinose | 1.00 | pInt only | pInt_only | 1 | 470000 | 20 | 0.0000426 | 0.0000391 |
1 % arabinose | 1.00 | pInt only | pInt_only | 2 | 450000 | 16 | 0.0000356 | 0.0000391 |
Now let’s plot:
<- ggplot(df_ara, aes(x = factor(arabinose_per), y = eff, color = locus )) +
plot_ara geom_path(data = df_ara %>% group_by(arabinose_per,locus, avg_eff) %>% summarise(),
aes(x = factor(arabinose_per), y = avg_eff,group = locus), size = 0.5) +
geom_jitter(shape = 21, width = 0.025, height = 0, alpha = 0.4) +
geom_point(data = . %>% filter(replicate ==1), aes(y = avg_eff))+
scale_fill_viridis_d(labels = c('galK','hisA','control'))+
scale_color_manual(values = c("#440154FF","#21908CFF", 'light gray'), labels = c('galK','hisA','control'))+
scale_y_log10(labels = scales::label_percent(accuracy = 0.001))+
labs(x = 'Arabinose %', y ='Efficiency', fill = 'Locus')
plot_ara
This experiment tested the effect of recoverying in arabinose following the ORBIT transformation for different periods of time.
<- read_csv('../../data/low_throughput_experiments/2022_07_13_recovery_time_data.csv') %>% #read in csv
df_timing mutate(eff = Kan_count / LB_count) %>% group_by(rec_time, locus) %>% mutate(avg_eff = mean(eff, na.rm = T))#calculate efficiency and average efficiency for replicates
%>% kable() %>% kable_styling() %>% scroll_box(height = '250px') df_timing
condition | rec_time | locus | replicate | LB_count | Kan_count | eff | avg_eff |
---|---|---|---|---|---|---|---|
p265 0hr | 0.0 | galK | 1 | 58000 | 0 | 0.0000000 | 0.0000000 |
p265 0hr | 0.0 | galK | 2 | 62000 | 0 | 0.0000000 | 0.0000000 |
p265 30min | 0.5 | galK | 1 | 85000 | 46 | 0.0005412 | 0.0008004 |
p265 30min | 0.5 | galK | 2 | 84000 | 89 | 0.0010595 | 0.0008004 |
p265 1hr | 1.0 | galK | 1 | 69000 | 270 | 0.0039130 | 0.0043069 |
p265 1hr | 1.0 | galK | 2 | 117000 | 550 | 0.0047009 | 0.0043069 |
p265 3hr | 3.0 | galK | 1 | 3700000 | 1700 | 0.0004595 | 0.0005538 |
p265 3hr | 3.0 | galK | 2 | 5400000 | 3500 | 0.0006481 | 0.0005538 |
p265 6hr | 6.0 | galK | 1 | 6800000 | 2600 | 0.0003824 | 0.0004700 |
p265 6hr | 6.0 | galK | 2 | 5200000 | 2900 | 0.0005577 | 0.0004700 |
p266 0hr | 0.0 | hisA | 1 | 64000 | 0 | 0.0000000 | 0.0000000 |
p266 0hr | 0.0 | hisA | 2 | 61000 | 0 | 0.0000000 | 0.0000000 |
p266 30min | 0.5 | hisA | 1 | 320000 | 40 | 0.0001250 | 0.0001654 |
p266 30min | 0.5 | hisA | 2 | 340000 | 70 | 0.0002059 | 0.0001654 |
p266 1hr | 1.0 | hisA | 1 | 360000 | 590 | 0.0016389 | 0.0012694 |
p266 1hr | 1.0 | hisA | 2 | 400000 | 360 | 0.0009000 | 0.0012694 |
p266 3hr | 3.0 | hisA | 1 | 3200000 | 4600 | 0.0014375 | 0.0010187 |
p266 3hr | 3.0 | hisA | 2 | 6000000 | 3600 | 0.0006000 | 0.0010187 |
p266 6hr | 6.0 | hisA | 1 | 5500000 | 3700 | 0.0006727 | 0.0006248 |
p266 6hr | 6.0 | hisA | 2 | 5200000 | 3000 | 0.0005769 | 0.0006248 |
pInt 0hr | 0.0 | pInt_only | 1 | 56000 | 0 | 0.0000000 | 0.0000000 |
pInt 0hr | 0.0 | pInt_only | 2 | 47000 | 0 | 0.0000000 | 0.0000000 |
pInt 30min | 0.5 | pInt_only | 1 | 310000 | 1 | 0.0000032 | 0.0000035 |
pInt 30min | 0.5 | pInt_only | 2 | 260000 | 1 | 0.0000038 | 0.0000035 |
pInt 1hr | 1.0 | pInt_only | 1 | 370000 | 8 | 0.0000216 | 0.0000150 |
pInt 1hr | 1.0 | pInt_only | 2 | 360000 | 3 | 0.0000083 | 0.0000150 |
pInt 3hr | 3.0 | pInt_only | 1 | 4900000 | 21 | 0.0000043 | 0.0000048 |
pInt 3hr | 3.0 | pInt_only | 2 | 4000000 | 21 | 0.0000052 | 0.0000048 |
pInt 6hr | 6.0 | pInt_only | 1 | 6000000 | 10 | 0.0000017 | 0.0000018 |
pInt 6hr | 6.0 | pInt_only | 2 | 4800000 | 9 | 0.0000019 | 0.0000018 |
You can see from the table, at zero hrs no colonies were recovered in any of the conditions. These points are not visible on the log scale of the plot, but they were indeed measured. Let’s plot:
<- ggplot(df_timing, aes(x = factor(rec_time), y = eff, color = locus )) +
plot_timing geom_path(data = df_timing %>% group_by(rec_time,locus, avg_eff) %>% summarise(),
aes(x = factor(rec_time), y = avg_eff, group = locus), size = 0.5) +
geom_jitter(shape = 21, width = 0.025, height = 0, alpha =0.4) +
geom_point(data = . %>% filter(replicate ==1), aes(y=avg_eff))+
scale_color_manual(values = c("#440154FF","#21908CFF", 'light gray'), labels = c('galK','hisA','control'))+
scale_y_log10(labels = scales::label_percent(accuracy = 0.001))+
scale_x_discrete(labels = c( '0 min', '30 min', '1 hr', '3 hr', '6 hr'))+
labs(x = 'Recovery time', y ='Efficiency', fill = 'Locus')
plot_timing
theme_set(theme_figure())
<- plot_grid(plot_cond, plot_cond_labs, ncol = 1, rel_heights = c(3,1), rel_widths = c(1,1),
plot_cond_legend align = 'hv', axis = 'lr', scale = 0.9)
<- plot_grid(plot_to_len + guides(color = 'none', fill = 'none'), plot_lag+ guides(shape = 'none'),
fig_2_bottom + guides(color = 'none', fill = 'none'),plot_pint_conc+ guides(color = 'none'),
plot_to_conc+ guides(color = 'none'), plot_timing+ guides(color = 'none'),
plot_ara ncol = 2, rel_heights = c(1,1), rel_widths = c(1,1),
align = 'hv', axis = 'lr', scale = 0.9,
labels = c('C','D','E','F','G','H'))
<- plot_grid(plot_cond_legend, fig_2_bottom, ncol = 1, rel_heights = c(1,3), scale = 1.0, labels = c('B'))
fig_2
fig_2
save_plot("../../figures/r_pdf_figs/main_figs/fig_2_optimization.pdf", fig_2, base_width = 7, base_height = 7)
sessionInfo()
## R version 4.2.0 (2022-04-22)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Big Sur/Monterey 10.16
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.2/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.2/Resources/lib/libRlapack.dylib
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] kableExtra_1.3.4 cowplot_1.1.1 viridis_0.6.2 viridisLite_0.4.1
## [5] knitr_1.41 forcats_0.5.2 stringr_1.5.0 dplyr_1.1.0
## [9] purrr_0.3.5 readr_2.1.3 tidyr_1.2.1 tibble_3.1.8
## [13] ggplot2_3.4.0 tidyverse_1.3.2
##
## loaded via a namespace (and not attached):
## [1] httr_1.4.4 sass_0.4.4 bit64_4.0.5
## [4] vroom_1.6.0 jsonlite_1.8.3 modelr_0.1.10
## [7] bslib_0.4.1 assertthat_0.2.1 highr_0.9
## [10] googlesheets4_1.0.1 cellranger_1.1.0 yaml_2.3.6
## [13] pillar_1.8.1 backports_1.4.1 glue_1.6.2
## [16] digest_0.6.30 rvest_1.0.3 colorspace_2.0-3
## [19] htmltools_0.5.4 pkgconfig_2.0.3 broom_1.0.1
## [22] haven_2.5.1 scales_1.2.1 webshot_0.5.4
## [25] svglite_2.1.0 tzdb_0.3.0 timechange_0.1.1
## [28] googledrive_2.0.0 generics_0.1.3 farver_2.1.1
## [31] ellipsis_0.3.2 cachem_1.0.6 withr_2.5.0
## [34] cli_3.4.1 magrittr_2.0.3 crayon_1.5.2
## [37] readxl_1.4.1 evaluate_0.18 fs_1.5.2
## [40] fansi_1.0.3 xml2_1.3.3 textshaping_0.3.6
## [43] tools_4.2.0 hms_1.1.2 gargle_1.2.1
## [46] lifecycle_1.0.3 munsell_0.5.0 reprex_2.0.2
## [49] compiler_4.2.0 jquerylib_0.1.4 systemfonts_1.0.4
## [52] rlang_1.0.6 grid_4.2.0 rstudioapi_0.14
## [55] labeling_0.4.2 rmarkdown_2.18 gtable_0.3.1
## [58] DBI_1.1.3 R6_2.5.1 gridExtra_2.3
## [61] lubridate_1.9.0 fastmap_1.1.0 bit_4.0.5
## [64] utf8_1.2.2 ragg_1.2.5 stringi_1.7.8
## [67] parallel_4.2.0 vctrs_0.5.2 dbplyr_2.2.1
## [70] tidyselect_1.2.0 xfun_0.35