Notes

This figure contains data from many different experiments that were used to optimize the protocol for ORBIT. Data figures were made in R notebooks and exported as pdfs. Cosmetic improvements were made in Adobe Illustrator. Note that Figure 2A is a diagram that was made in Adobe Illustrator.


Setup packages and plotting for the notebook:

# Check packages
source("../tools/package_setup.R")

# Load packages
library(tidyverse)
library(cowplot)
library(kableExtra)

# Code display options
knitr::opts_chunk$set(tidy.opts=list(width.cutoff=60),tidy=FALSE, echo = TRUE, message=FALSE, warning=FALSE, fig.align="center", fig.retina = 2)

# Load plotting tools
source("../tools/plotting_tools.R")

#Modify the plot theme
theme_set(theme_notebook())

Fig. 2B - Helper plasmid induction conditions

This experiment used a ∆galK targeting oligo with a pInt_kanR integrating plasmid and tested different helper plasmid induction schemes. Let’s first read in the data.

df_cond <- read_csv("../../data/low_throughput_experiments/2021_07_01_tol_ara_params.csv")

df_cond %>% kable() %>% kable_styling() %>% scroll_box(height = '250px')
cond_name cond_id pre_tol pre_ara post_tol post_ara rep LB Kan Kan_LB
uninduced - - 1 FALSE FALSE FALSE FALSE 1 3000000 22 0.0000073
uninduced - - 1 FALSE FALSE FALSE FALSE 2 3900000 13 0.0000033
uninduced - - 1 FALSE FALSE FALSE FALSE 3 5000000 5 0.0000010
uninduced - - 1 FALSE FALSE FALSE FALSE NC 3800000 8 0.0000021
uninduced - + 2 FALSE FALSE FALSE TRUE 1 3000000 20 0.0000067
uninduced - + 2 FALSE FALSE FALSE TRUE 2 2600000 10 0.0000038
uninduced - + 2 FALSE FALSE FALSE TRUE 3 2000000 16 0.0000080
uninduced - + 2 FALSE FALSE FALSE TRUE NC 2400000 15 0.0000063
uninduced + + 3 FALSE FALSE TRUE TRUE 1 330000 4 0.0000121
uninduced + + 3 FALSE FALSE TRUE TRUE 2 420000 5 0.0000119
uninduced + + 3 FALSE FALSE TRUE TRUE 3 320000 6 0.0000187
uninduced + + 3 FALSE FALSE TRUE TRUE NC 250000 8 0.0000320
tol induced - - 4 TRUE FALSE FALSE FALSE 1 3400000 3200 0.0009412
tol induced - - 4 TRUE FALSE FALSE FALSE 2 3000000 2800 0.0009333
tol induced - - 4 TRUE FALSE FALSE FALSE 3 1700000 1600 0.0009412
tol induced - - 4 TRUE FALSE FALSE FALSE NC 2100000 9 0.0000043
tol induced - + 5 TRUE FALSE FALSE TRUE 1 1760000 8300 0.0047159
tol induced - + 5 TRUE FALSE FALSE TRUE 2 2400000 7100 0.0029583
tol induced - + 5 TRUE FALSE FALSE TRUE 3 2600000 5000 0.0019231
tol induced - + 5 TRUE FALSE FALSE TRUE NC 3400000 8 0.0000024
tol induced + + 6 TRUE FALSE TRUE TRUE 1 210000 63 0.0003000
tol induced + + 6 TRUE FALSE TRUE TRUE 2 110000 45 0.0004091
tol induced + + 6 TRUE FALSE TRUE TRUE 3 260000 90 0.0003462
tol induced + + 6 TRUE FALSE TRUE TRUE NC 180000 9 0.0000500
tol + ara induced - - 7 TRUE TRUE FALSE FALSE 1 2600000 2400 0.0009231
tol + ara induced - - 7 TRUE TRUE FALSE FALSE 2 3100000 5600 0.0018065
tol + ara induced - - 7 TRUE TRUE FALSE FALSE 3 2900000 3900 0.0013448
tol + ara induced - - 7 TRUE TRUE FALSE FALSE NC 1100000 33 0.0000300
tol + ara induced - + 8 TRUE TRUE FALSE TRUE 1 890000 6400 0.0071910
tol + ara induced - + 8 TRUE TRUE FALSE TRUE 2 1900000 10200 0.0053684
tol + ara induced - + 8 TRUE TRUE FALSE TRUE 3 2000000 6600 0.0033000
tol + ara induced - + 8 TRUE TRUE FALSE TRUE NC 2400000 56 0.0000233
tol + ara induced + + 9 TRUE TRUE TRUE TRUE 1 150000 150 0.0010000
tol + ara induced + + 9 TRUE TRUE TRUE TRUE 2 140000 160 0.0011429
tol + ara induced + + 9 TRUE TRUE TRUE TRUE 3 190000 90 0.0004737
tol + ara induced + + 9 TRUE TRUE TRUE TRUE NC 40000 6 0.0001500

Now let’s plot the data. Note that the labeling of each condition gets complicated, so we made a special legend with + / - for each possible induction.

#Plot individual replicates, mean points / crossbars, and negative control Xs
plot_cond <- ggplot(df_cond %>% filter(rep != 'NC'), aes(x = factor(cond_id), y = Kan_LB)) + 
  geom_point(data = df_cond %>% filter(rep == 'NC') , shape = 4, color = 'light gray') + 
  geom_jitter(width = 0.1, height = 0, shape =21, alpha = 0.4, color = "#440154FF") + 
  stat_summary(fun = 'mean', geom = 'crossbar', width = 0.5, size = 0.25, color = "#440154FF")+
  stat_summary(fun = 'mean', geom = 'point', width = 0.5, color = "#440154FF")+
  scale_y_log10(labels = scales::label_percent(accuracy = 0.0001), breaks = c(0.000001,0.0001, 0.01))+
  scale_x_discrete(labels = NULL)+
  theme(axis.text.x = element_text(angle = 45, hjust = 1))+
  labs(x = NULL, y = 'Efficiency')

plot_cond_labs <- df_cond %>% filter(rep == 1) %>% select(cond_id, pre_tol, pre_ara, post_tol, post_ara) %>% pivot_longer(cols = c('pre_tol','pre_ara','post_tol','post_ara'), names_to = 'inducer') %>% mutate(value = ifelse(value == T, '+','-')) %>% 
  ggplot(aes(x = factor(cond_id), y = inducer, label = value)) + geom_text() + 
  scale_y_discrete(limits = c('post_ara','pre_ara','post_tol','pre_tol'),labels = c('post ara','pre ara','post tol','pre tol')) + 
  labs(x = NULL, y = NULL) + theme(axis.line = element_blank(), axis.text.x = element_blank(), axis.ticks = element_blank())

plot_cond_legend <- plot_grid(plot_cond, plot_cond_labs, ncol = 1, rel_heights = c(4,1), rel_widths = c(1,1), 
                   align = 'hv', axis = 'lr', scale = 1.0)
plot_cond_legend

Fig. 2C - Targeting oligo length

This experiment tested the effect of targeting oligo length. Identical oligos were used for the four different loci, but their homology arms varied in length. The total length of the oligo is reported here, which includes both homology arms and the 38 bp attB site. Let’s read in the data.

df_len <- read_csv("../../data/low_throughput_experiments/2022_02_15_orbit_TO_len_data.csv") %>% 
  mutate(eff = Kan / LB)

df_len %>% kable() %>% kable_styling() %>% scroll_box(height = '250px')
targeting_oligo gene TO_len replicate LB Kan eff
269 galK 74 1 530000 190 0.0003585
269 galK 74 2 860000 150 0.0001744
269 galK 74 3 1120000 220 0.0001964
264 galK 90 1 970000 28000 0.0288660
264 galK 90 2 400000 7900 0.0197500
264 galK 90 3 930000 9200 0.0098925
270 galK 104 1 920000 25000 0.0271739
270 galK 104 2 940000 22000 0.0234043
270 galK 104 3 1090000 15000 0.0137615
265 galK 120 1 1140000 24000 0.0210526
265 galK 120 2 910000 23000 0.0252747
265 galK 120 3 950000 18000 0.0189474
271 hisA 74 1 810000 230 0.0002840
271 hisA 74 2 890000 150 0.0001685
271 hisA 74 3 1010000 210 0.0002079
272 hisA 90 1 640000 1800 0.0028125
272 hisA 90 2 500000 2400 0.0048000
272 hisA 90 3 820000 3000 0.0036585
273 hisA 104 1 750000 17000 0.0226667
273 hisA 104 2 1130000 24000 0.0212389
273 hisA 104 3 770000 20000 0.0259740
266 hisA 120 1 1900000 27000 0.0142105
266 hisA 120 2 960000 31000 0.0322917
266 hisA 120 3 810000 40000 0.0493827
274 metA 74 1 820000 80 0.0000976
274 metA 74 2 740000 230 0.0003108
274 metA 74 3 740000 19 0.0000257
275 metA 90 1 710000 800 0.0011268
275 metA 90 2 870000 850 0.0009770
275 metA 90 3 790000 580 0.0007342
276 metA 104 1 600000 1200 0.0020000
276 metA 104 2 790000 2500 0.0031646
276 metA 104 3 850000 3400 0.0040000
267 metA 120 1 840000 10700 0.0127381
267 metA 120 2 730000 12800 0.0175342
267 metA 120 3 790000 10200 0.0129114
277 leuD 74 1 1300000 76 0.0000585
277 leuD 74 2 950000 78 0.0000821
277 leuD 74 3 760000 64 0.0000842
278 leuD 90 1 810000 1300 0.0016049
278 leuD 90 2 940000 1260 0.0013404
278 leuD 90 3 750000 1270 0.0016933
279 leuD 104 1 740000 1700 0.0022973
279 leuD 104 2 720000 2200 0.0030556
279 leuD 104 3 1030000 1800 0.0017476
268 leuD 120 1 670000 2800 0.0041791
268 leuD 120 2 570000 1770 0.0031053
268 leuD 120 3 710000 1600 0.0022535
pInt_only NA NA 1 1090000 300 0.0002752
pInt_only NA NA 2 1190000 250 0.0002101
pInt_only NA NA 3 1090000 130 0.0001193

Let’s plot the data.

# Calculate condition means and standard deviations
df_len_summary <- df_len %>% 
  filter(targeting_oligo != 'pInt_only') %>% 
  group_by(gene, TO_len) %>% 
  summarise(mean = mean(eff), sd = sd(eff)) 

# Get negative control value
df_len_control <- df_len %>% filter(targeting_oligo == 'pInt_only')
to_len_pInt <- mean(df_len_control$eff)

# Plot individual replicates, mean points, and connecting lines
plot_to_len <- ggplot(df_len, aes(x = TO_len, y = eff, color = gene)) + 
  geom_hline(yintercept = to_len_pInt, color = 'gray', linetype = 'dashed')+
  geom_jitter(shape = 21, alpha = 0.4, width =1, height =0) +
 geom_point(data = df_len_summary,
                 aes(y = mean),
                 position = position_jitter(height = 0, width = 0.1))+
 # geom_point(data = df_len_summary %>% filter(!(gene=='metA' & TO_len == 74)), 
 #                 aes(y = mean),
 #                 position = position_jitter(height = 0, width = 0.1))+
 # geom_point(data = df_len_summary %>% filter(gene == 'metA' & TO_len == 74), 
 #                 aes(y = mean), 
 #                 position = position_jitter(height = 0, width = 0.5))+
  geom_line(data = df_len_summary, aes(y = mean)) + 
  scale_y_log10(labels = scales::label_percent(accuracy = 0.01)) +
  scale_x_continuous(breaks = c(74,90,104,120))+
  scale_colour_viridis_d(limits = c('galK','hisA','metA','leuD')) + scale_fill_viridis_d(limits = c('galK','hisA','metA','leuD')) +
  labs(x = "Targeting oligo length (nt)", y = "Efficiency", color = NULL)

plot_to_len

Fig. 2D - Leading vs. Lagging strand TO

This experiment tested targeting oligos binding the leading or lagging strand at each locus. 120 nt TOs were used. Let’s read in the data:

df_lag <- read_csv('../../data/low_throughput_experiments/2022_03_07_leading_lagging_data.csv') %>% #read in csv
  mutate(eff = Kan_count / LB_count) %>% group_by(locus, strand) %>% mutate(avg_eff = mean(eff)) %>% #calculate efficiency and average efficiency for replicates
  mutate(locus = factor(locus, levels = c('galK','hisA','metA','leuD','pInt only'))) %>% 
  mutate(strand = factor(strand, levels = c('leading','lagging','none')))

df_lag %>% kable() %>% kable_styling() %>% scroll_box(height = '250px')
locus strand replicate LB_count Kan_count eff avg_eff
galK lagging 1 23000000 130000 0.0056522 0.0061019
galK lagging 2 29000000 190000 0.0065517 0.0061019
galK leading 1 27000000 310 0.0000115 0.0000448
galK leading 2 7300000 570 0.0000781 0.0000448
hisA lagging 1 30000000 460000 0.0153333 0.0147037
hisA lagging 2 27000000 380000 0.0140741 0.0147037
hisA leading 1 27000000 670 0.0000248 0.0000322
hisA leading 2 22000000 870 0.0000395 0.0000322
metA lagging 1 1140000 13500 0.0118421 0.0093622
metA lagging 2 1700000 11700 0.0068824 0.0093622
metA leading 1 1700000 1260 0.0007412 0.0009649
metA leading 2 1220000 1450 0.0011885 0.0009649
leuD lagging 1 970000 3400 0.0035052 0.0036507
leuD lagging 2 1080000 4100 0.0037963 0.0036507
leuD leading 1 1040000 330 0.0003173 0.0002857
leuD leading 2 1220000 310 0.0002541 0.0002857
pInt only none 1 1300000 153 0.0001177 0.0000808
pInt only none 2 2500000 110 0.0000440 0.0000808

Now let’s plot:

#Get negative control values
lag_pInt <- (df_lag %>% filter(strand=='none'))$avg_eff[1]

#Plot individual replicates, mean points and crossbars and negative control values.
plot_lag <- ggplot(df_lag %>% filter(strand!='none'), aes(x = strand, y = eff, color = locus)) + 
  geom_hline(yintercept = lag_pInt, color = 'light gray', linetype = 2)+
  geom_point(position = position_dodge(width = 1), alpha =0.4, fill = NA, shape = 21) +
  stat_summary(fun = 'mean', geom = 'crossbar',position = position_dodge(width = 1), width = 0.5, size = 0.25)+
  stat_summary(fun = 'mean', geom = 'point',position = position_dodge(width = 1), width = 0.5)+
  facet_grid(~locus)+
  scale_color_viridis_d()+
  scale_y_log10(labels = scales::label_percent(accuracy = 0.001), breaks = c(0.0001, 0.001,0.01)) +
  scale_x_discrete(labels = c('lead','lag'))+
  labs(y = 'Efficiency', x = 'Targeting oligo strand', color = NULL)+guides( color = 'none')+
  theme(panel.border = element_rect(color = 'black', fill = NA))
  
plot_lag

Fig. 2E - Targeting oligo concentration

This experiment tested the effect of TO concentration (final in 50 µL cell aliquots). Standard 120 nt TOs were used for each locus. Let’s read in the data:

df_to_conc <- read_csv("../../data/low_throughput_experiments/2022_09_28_TO_conc_4_loci_data.csv")%>% 
  mutate(eff = Kan_count / LB_count) %>% 
  group_by(TO_conc, locus, condition) %>% 
  mutate(avg_eff = mean(eff, na.rm = T)) %>% #calculate efficiency and average efficiency for replicates
  mutate(locus = factor(locus, levels = c('galK','hisA','metA','leuD')))


df_to_conc %>% kable() %>% kable_styling() %>% scroll_box(height = '250px')
condition TO_conc locus replicate LB_count Kan_count eff avg_eff
4uM galK 4000 galK 1 1300000 570 0.0004385 0.0003604
4uM galK 4000 galK 2 1700000 480 0.0002824 0.0003604
1uM galK 1000 galK 1 1700000 820 0.0004824 0.0004252
1uM galK 1000 galK 2 2500000 920 0.0003680 0.0004252
100nM galK 100 galK 1 2400000 900 0.0003750 0.0005486
100nM galK 100 galK 2 1800000 1300 0.0007222 0.0005486
10nM galK 10 galK 1 1100000 560 0.0005091 0.0002962
10nM galK 10 galK 2 2400000 200 0.0000833 0.0002962
4uM hisA 4000 hisA 1 1700000 1230 0.0007235 0.0008618
4uM hisA 4000 hisA 2 1000000 1000 0.0010000 0.0008618
1uM hisA 1000 hisA 1 1610000 730 0.0004534 0.0002737
1uM hisA 1000 hisA 2 1160000 109 0.0000940 0.0002737
100nM hisA 100 hisA 1 2400000 1600 0.0006667 0.0011333
100nM hisA 100 hisA 2 2500000 4000 0.0016000 0.0011333
10nM hisA 10 hisA 1 2500000 830 0.0003320 0.0007035
10nM hisA 10 hisA 2 1200000 1290 0.0010750 0.0007035
4uM metA 4000 metA 1 2000000 860 0.0004300 0.0004885
4uM metA 4000 metA 2 1700000 930 0.0005471 0.0004885
1uM metA 1000 metA 1 2400000 1800 0.0007500 0.0005950
1uM metA 1000 metA 2 1500000 660 0.0004400 0.0005950
100nM metA 100 metA 1 2200000 1830 0.0008318 0.0010945
100nM metA 100 metA 2 2800000 3800 0.0013571 0.0010945
10nM metA 10 metA 1 1100000 610 0.0005545 0.0007523
10nM metA 10 metA 2 1000000 950 0.0009500 0.0007523
4uM leuD 4000 leuD 1 1700000 330 0.0001941 0.0002037
4uM leuD 4000 leuD 2 1500000 320 0.0002133 0.0002037
1uM leuD 1000 leuD 1 1600000 140 0.0000875 0.0001009
1uM leuD 1000 leuD 2 1400000 160 0.0001143 0.0001009
100nM leuD 100 leuD 1 1500000 480 0.0003200 0.0002475
100nM leuD 100 leuD 2 1600000 280 0.0001750 0.0002475
10nM leuD 10 leuD 1 1800000 80 0.0000444 0.0000568
10nM leuD 10 leuD 2 2600000 180 0.0000692 0.0000568
pInt only 0 NA 1 2900000 30 0.0000103 0.0000172
pInt only 0 NA 2 1500000 36 0.0000240 0.0000172

Let’s plot the data:

#Calculate negative control value
to_conc_pInt <- (df_to_conc %>% filter(condition == 'pInt only'))$avg_eff[1]

#Plot with individual observations, mean points and connecting lines
plot_to_conc <- ggplot(df_to_conc %>% filter(TO_conc>0), aes(x = TO_conc, y = avg_eff, color = locus)) + 
  geom_hline(yintercept = to_conc_pInt, linetype = 2, color = 'light gray')+
  geom_jitter(aes(y = eff), shape = 21, alpha = 0.4, width = 0.1, height = 0)+
  geom_point() + geom_line() + scale_x_log10(breaks = c(10,100,1000,4000), labels = c('10 nM', '100 nM', '1 µM','4 µM')) + scale_color_viridis_d() + 
  scale_y_continuous(labels = scales::label_percent(), trans = 'log10', breaks = c(0.001, 0.0001, 0.00001), limits = c(0.00001,NA))+
  labs(y = 'Efficiency',x = 'Targeting oligo concentration')

plot_to_conc

Fig. 2F - Integrating plasmid added

This experiment tested the effect of how much integrating plasmid (pInt_attP1_kanR) was added to the ORBIT transformation. Let’s read in the data:

df_pint_conc <- read_csv('../../data/low_throughput_experiments/2022_03_31_pInt_conc_galK_hisA_data.csv') %>% #read in csv
  mutate(eff = Kan_count / LB_count) %>% group_by(pInt_ng, TO_added, locus) %>% mutate(avg_eff = mean(eff, na.rm = T)) #calculate efficiency and average efficiency for replicates

df_pint_conc %>% kable() %>% kable_styling() %>% scroll_box(height = '250px')
condition pInt_ng TO_added locus replicate LB_count Kan_count eff avg_eff
1ng pInt 1 FALSE pInt_only 1 1.02e+08 7 0.0000001 0.0000001
1ng pInt 1 FALSE pInt_only 2 7.00e+07 8 0.0000001 0.0000001
10ng pInt 10 FALSE pInt_only 1 6.60e+07 20 0.0000003 0.0000003
10ng pInt 10 FALSE pInt_only 2 7.90e+07 20 0.0000003 0.0000003
100ng pInt 100 FALSE pInt_only 1 6.80e+07 290 0.0000043 0.0000033
100ng pInt 100 FALSE pInt_only 2 7.20e+07 170 0.0000024 0.0000033
278ng pInt 278 FALSE pInt_only 1 7.20e+07 280 0.0000039 0.0000061
278ng pInt 278 FALSE pInt_only 2 6.20e+07 520 0.0000084 0.0000061
1ng pInt + p265 1 TRUE galK 1 6.90e+07 510 0.0000074 0.0000099
1ng pInt + p265 1 TRUE galK 2 6.40e+07 800 0.0000125 0.0000099
10ng pInt + p265 10 TRUE galK 1 5.80e+07 4300 0.0000741 0.0001103
10ng pInt + p265 10 TRUE galK 2 5.60e+07 8200 0.0001464 0.0001103
100ng pInt + p265 100 TRUE galK 1 2.50e+07 35000 0.0014000 0.0016583
100ng pInt + p265 100 TRUE galK 2 2.40e+07 46000 0.0019167 0.0016583
278ng pInt + p265 278 TRUE galK 1 2.10e+07 75000 0.0035714 0.0028151
278ng pInt + p265 278 TRUE galK 2 1.70e+07 35000 0.0020588 0.0028151
1ng pInt + p266 1 TRUE hisA 1 3.10e+07 940 0.0000303 0.0000311
1ng pInt + p266 1 TRUE hisA 2 2.60e+07 830 0.0000319 0.0000311
10ng pInt + p266 10 TRUE hisA 1 7.00e+07 22000 0.0003143 0.0003264
10ng pInt + p266 10 TRUE hisA 2 6.50e+07 22000 0.0003385 0.0003264
100ng pInt + p266 100 TRUE hisA 1 6.10e+07 97000 0.0015902 0.0015902
100ng pInt + p266 100 TRUE hisA 2 5.60e+07 NA NA 0.0015902
278ng pInt + p266 278 TRUE hisA 1 5.20e+07 180000 0.0034615 0.0054808
278ng pInt + p266 278 TRUE hisA 2 5.60e+07 420000 0.0075000 0.0054808

Note that the second hisA 100ng data point is NA because it came back as zero colonies on the plate, which was an obvious error.

plot_pint_conc <- ggplot(df_pint_conc, aes(x = pInt_ng, y = eff, color = locus )) + 
  geom_path(data = df_pint_conc %>% group_by(pInt_ng, TO_added,locus, avg_eff) %>% summarise(), 
            aes(x = pInt_ng, y = avg_eff, group = locus), size = 0.5) +
    geom_jitter(shape = 21, width = 0.025, height = 0, alpha = 0.4) +
  geom_point(data = . %>% filter(replicate==1), aes(y = avg_eff))+
  scale_color_manual(values = c("#440154FF","#21908CFF", 'light gray'), labels = c('galK','hisA','control'))+
  scale_x_log10()+
  scale_y_log10(labels = scales::label_percent(accuracy = 0.0001))+
  labs(x = 'Integrating plasmid concentration (ng) ', y ='Efficiency', fill = 'Locus')

plot_pint_conc

Fig. 2G - Arabinose Recovery Levels

This experiment tested the effect of arabinose (bxb-1 inducer for pHelper-Ec1-gentR) in a 1 hr recovery culture. Let’s read in the data:

df_ara <- read_csv('../../data/low_throughput_experiments/2022_06_21_arabinose_levels_data.csv') %>% #read in csv
  mutate(eff = Kan_count / LB_count) %>% group_by(arabinose_per, locus) %>% mutate(avg_eff = mean(eff)) #calculate efficiency and average efficiency for replicates

df_ara %>% kable() %>% kable_styling() %>% scroll_box(height = '250px')
condition arabinose_per TO locus replicate LB_count Kan_count eff avg_eff
0% arabinose 0.00 p265 galK 1 430000 370 0.0008605 0.0006352
0% arabinose 0.00 p265 galK 2 610000 250 0.0004098 0.0006352
0.01 % arabinose 0.01 p265 galK 1 680000 550 0.0008088 0.0008317
0.01 % arabinose 0.01 p265 galK 2 550000 470 0.0008545 0.0008317
0.1 % arabinose 0.10 p265 galK 1 440000 960 0.0021818 0.0018556
0.1 % arabinose 0.10 p265 galK 2 510000 780 0.0015294 0.0018556
1 % arabinose 1.00 p265 galK 1 550000 1490 0.0027091 0.0022013
1 % arabinose 1.00 p265 galK 2 620000 1050 0.0016935 0.0022013
0% arabinose 0.00 p266 hisA 1 440000 93 0.0002114 0.0002514
0% arabinose 0.00 p266 hisA 2 470000 137 0.0002915 0.0002514
0.01 % arabinose 0.01 p266 hisA 1 560000 390 0.0006964 0.0012019
0.01 % arabinose 0.01 p266 hisA 2 410000 700 0.0017073 0.0012019
0.1 % arabinose 0.10 p266 hisA 1 600000 790 0.0013167 0.0017833
0.1 % arabinose 0.10 p266 hisA 2 520000 1170 0.0022500 0.0017833
1 % arabinose 1.00 p266 hisA 1 540000 1260 0.0023333 0.0022768
1 % arabinose 1.00 p266 hisA 2 590000 1310 0.0022203 0.0022768
0% arabinose 0.00 pInt only pInt_only 1 500000 6 0.0000120 0.0000122
0% arabinose 0.00 pInt only pInt_only 2 560000 7 0.0000125 0.0000122
0.01 % arabinose 0.01 pInt only pInt_only 1 560000 7 0.0000125 0.0000138
0.01 % arabinose 0.01 pInt only pInt_only 2 530000 8 0.0000151 0.0000138
0.1 % arabinose 0.10 pInt only pInt_only 1 570000 8 0.0000140 0.0000144
0.1 % arabinose 0.10 pInt only pInt_only 2 610000 9 0.0000148 0.0000144
1 % arabinose 1.00 pInt only pInt_only 1 470000 20 0.0000426 0.0000391
1 % arabinose 1.00 pInt only pInt_only 2 450000 16 0.0000356 0.0000391

Now let’s plot:

plot_ara <- ggplot(df_ara, aes(x = factor(arabinose_per), y = eff, color = locus )) + 
  geom_path(data = df_ara %>% group_by(arabinose_per,locus, avg_eff) %>% summarise(), 
            aes(x = factor(arabinose_per), y = avg_eff,group = locus), size = 0.5) +
    geom_jitter(shape = 21, width = 0.025, height = 0, alpha = 0.4) + 
  geom_point(data = . %>% filter(replicate ==1), aes(y = avg_eff))+
  scale_fill_viridis_d(labels = c('galK','hisA','control'))+
  scale_color_manual(values = c("#440154FF","#21908CFF", 'light gray'), labels = c('galK','hisA','control'))+
  scale_y_log10(labels = scales::label_percent(accuracy = 0.001))+
  labs(x = 'Arabinose %', y ='Efficiency', fill = 'Locus')

plot_ara

Fig. 2H - Recovery time

This experiment tested the effect of recoverying in arabinose following the ORBIT transformation for different periods of time.

df_timing <- read_csv('../../data/low_throughput_experiments/2022_07_13_recovery_time_data.csv') %>% #read in csv
  mutate(eff = Kan_count / LB_count) %>% group_by(rec_time, locus) %>% mutate(avg_eff = mean(eff, na.rm = T))#calculate efficiency and average efficiency for replicates

df_timing %>% kable() %>% kable_styling() %>% scroll_box(height = '250px')
condition rec_time locus replicate LB_count Kan_count eff avg_eff
p265 0hr 0.0 galK 1 58000 0 0.0000000 0.0000000
p265 0hr 0.0 galK 2 62000 0 0.0000000 0.0000000
p265 30min 0.5 galK 1 85000 46 0.0005412 0.0008004
p265 30min 0.5 galK 2 84000 89 0.0010595 0.0008004
p265 1hr 1.0 galK 1 69000 270 0.0039130 0.0043069
p265 1hr 1.0 galK 2 117000 550 0.0047009 0.0043069
p265 3hr 3.0 galK 1 3700000 1700 0.0004595 0.0005538
p265 3hr 3.0 galK 2 5400000 3500 0.0006481 0.0005538
p265 6hr 6.0 galK 1 6800000 2600 0.0003824 0.0004700
p265 6hr 6.0 galK 2 5200000 2900 0.0005577 0.0004700
p266 0hr 0.0 hisA 1 64000 0 0.0000000 0.0000000
p266 0hr 0.0 hisA 2 61000 0 0.0000000 0.0000000
p266 30min 0.5 hisA 1 320000 40 0.0001250 0.0001654
p266 30min 0.5 hisA 2 340000 70 0.0002059 0.0001654
p266 1hr 1.0 hisA 1 360000 590 0.0016389 0.0012694
p266 1hr 1.0 hisA 2 400000 360 0.0009000 0.0012694
p266 3hr 3.0 hisA 1 3200000 4600 0.0014375 0.0010187
p266 3hr 3.0 hisA 2 6000000 3600 0.0006000 0.0010187
p266 6hr 6.0 hisA 1 5500000 3700 0.0006727 0.0006248
p266 6hr 6.0 hisA 2 5200000 3000 0.0005769 0.0006248
pInt 0hr 0.0 pInt_only 1 56000 0 0.0000000 0.0000000
pInt 0hr 0.0 pInt_only 2 47000 0 0.0000000 0.0000000
pInt 30min 0.5 pInt_only 1 310000 1 0.0000032 0.0000035
pInt 30min 0.5 pInt_only 2 260000 1 0.0000038 0.0000035
pInt 1hr 1.0 pInt_only 1 370000 8 0.0000216 0.0000150
pInt 1hr 1.0 pInt_only 2 360000 3 0.0000083 0.0000150
pInt 3hr 3.0 pInt_only 1 4900000 21 0.0000043 0.0000048
pInt 3hr 3.0 pInt_only 2 4000000 21 0.0000052 0.0000048
pInt 6hr 6.0 pInt_only 1 6000000 10 0.0000017 0.0000018
pInt 6hr 6.0 pInt_only 2 4800000 9 0.0000019 0.0000018

You can see from the table, at zero hrs no colonies were recovered in any of the conditions. These points are not visible on the log scale of the plot, but they were indeed measured. Let’s plot:

plot_timing <- ggplot(df_timing, aes(x = factor(rec_time), y = eff, color = locus )) + 
  geom_path(data = df_timing %>% group_by(rec_time,locus, avg_eff) %>% summarise(), 
            aes(x = factor(rec_time), y = avg_eff, group = locus), size = 0.5) +
  geom_jitter(shape = 21, width = 0.025, height = 0, alpha =0.4) + 
  geom_point(data = . %>% filter(replicate ==1), aes(y=avg_eff))+
  scale_color_manual(values = c("#440154FF","#21908CFF", 'light gray'), labels = c('galK','hisA','control'))+
  scale_y_log10(labels = scales::label_percent(accuracy = 0.001))+
  scale_x_discrete(labels = c(  '0 min', '30 min', '1 hr', '3 hr', '6 hr'))+
  labs(x = 'Recovery time', y ='Efficiency', fill = 'Locus')

plot_timing

Create Fig. 2

theme_set(theme_figure())

plot_cond_legend <- plot_grid(plot_cond, plot_cond_labs, ncol = 1, rel_heights = c(3,1), rel_widths = c(1,1), 
                   align = 'hv', axis = 'lr', scale = 0.9)

fig_2_bottom <- plot_grid(plot_to_len + guides(color = 'none', fill = 'none'), plot_lag+ guides(shape = 'none'),
                   plot_to_conc+ guides(color = 'none', fill = 'none'),plot_pint_conc+ guides(color = 'none'), 
                   plot_ara + guides(color = 'none'), plot_timing+ guides(color = 'none'), 
                   ncol = 2, rel_heights = c(1,1), rel_widths = c(1,1), 
                   align = 'hv', axis = 'lr', scale = 0.9,
                   labels = c('C','D','E','F','G','H'))

fig_2 <- plot_grid(plot_cond_legend, fig_2_bottom, ncol = 1, rel_heights = c(1,3), scale = 1.0, labels = c('B'))


fig_2

save_plot("../../figures/r_pdf_figs/main_figs/fig_2_optimization.pdf", fig_2, base_width = 7, base_height = 7)

sessionInfo()
## R version 4.2.0 (2022-04-22)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Big Sur/Monterey 10.16
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.2/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.2/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] kableExtra_1.3.4  cowplot_1.1.1     viridis_0.6.2     viridisLite_0.4.1
##  [5] knitr_1.41        forcats_0.5.2     stringr_1.5.0     dplyr_1.1.0      
##  [9] purrr_0.3.5       readr_2.1.3       tidyr_1.2.1       tibble_3.1.8     
## [13] ggplot2_3.4.0     tidyverse_1.3.2  
## 
## loaded via a namespace (and not attached):
##  [1] httr_1.4.4          sass_0.4.4          bit64_4.0.5        
##  [4] vroom_1.6.0         jsonlite_1.8.3      modelr_0.1.10      
##  [7] bslib_0.4.1         assertthat_0.2.1    highr_0.9          
## [10] googlesheets4_1.0.1 cellranger_1.1.0    yaml_2.3.6         
## [13] pillar_1.8.1        backports_1.4.1     glue_1.6.2         
## [16] digest_0.6.30       rvest_1.0.3         colorspace_2.0-3   
## [19] htmltools_0.5.4     pkgconfig_2.0.3     broom_1.0.1        
## [22] haven_2.5.1         scales_1.2.1        webshot_0.5.4      
## [25] svglite_2.1.0       tzdb_0.3.0          timechange_0.1.1   
## [28] googledrive_2.0.0   generics_0.1.3      farver_2.1.1       
## [31] ellipsis_0.3.2      cachem_1.0.6        withr_2.5.0        
## [34] cli_3.4.1           magrittr_2.0.3      crayon_1.5.2       
## [37] readxl_1.4.1        evaluate_0.18       fs_1.5.2           
## [40] fansi_1.0.3         xml2_1.3.3          textshaping_0.3.6  
## [43] tools_4.2.0         hms_1.1.2           gargle_1.2.1       
## [46] lifecycle_1.0.3     munsell_0.5.0       reprex_2.0.2       
## [49] compiler_4.2.0      jquerylib_0.1.4     systemfonts_1.0.4  
## [52] rlang_1.0.6         grid_4.2.0          rstudioapi_0.14    
## [55] labeling_0.4.2      rmarkdown_2.18      gtable_0.3.1       
## [58] DBI_1.1.3           R6_2.5.1            gridExtra_2.3      
## [61] lubridate_1.9.0     fastmap_1.1.0       bit_4.0.5          
## [64] utf8_1.2.2          ragg_1.2.5          stringi_1.7.8      
## [67] parallel_4.2.0      vctrs_0.5.2         dbplyr_2.2.1       
## [70] tidyselect_1.2.0    xfun_0.35