Notes

This figure contains data from many different experiments that were used to optimize the protocol for ORBIT. Data figures were made in R notebooks and exported as pdfs. Cosmetic improvements were made in Adobe Illustrator. Note that Figure 2A is a diagram that was made in Adobe Illustrator.

Setup packages and plotting for the notebook:

# Check packages
source("../tools/package_setup.R")

# Load packages
library(tidyverse)
library(cowplot)
library(kableExtra)

# Code display options
knitr::opts_chunk$set(tidy.opts=list(width.cutoff=60),tidy=FALSE, echo = TRUE, message=FALSE, warning=FALSE, fig.align="center", fig.retina = 2)

# Load plotting tools
source("../tools/plotting_tools.R")

#Modify the plot theme
theme_set(theme_notebook())

Fig. 2B - Helper plasmid induction conditions

This experiment used a ∆galK targeting oligo with a pInt_kanR integrating plasmid and tested different helper plasmid induction schemes. Let’s first read in the data.

df_cond <- read_csv("../../data/low_throughput_experiments/2021_07_01_tol_ara_params.csv")

df_cond %>% kable() %>% kable_styling() %>% scroll_box(height = '250px')

cond_name	cond_id	pre_tol	pre_ara	post_tol	post_ara	rep	LB	Kan	Kan_LB
uninduced - -	1	FALSE	FALSE	FALSE	FALSE	1	3000000	22	0.0000073
uninduced - -	1	FALSE	FALSE	FALSE	FALSE	2	3900000	13	0.0000033
uninduced - -	1	FALSE	FALSE	FALSE	FALSE	3	5000000	5	0.0000010
uninduced - -	1	FALSE	FALSE	FALSE	FALSE	NC	3800000	8	0.0000021
uninduced - +	2	FALSE	FALSE	FALSE	TRUE	1	3000000	20	0.0000067
uninduced - +	2	FALSE	FALSE	FALSE	TRUE	2	2600000	10	0.0000038
uninduced - +	2	FALSE	FALSE	FALSE	TRUE	3	2000000	16	0.0000080
uninduced - +	2	FALSE	FALSE	FALSE	TRUE	NC	2400000	15	0.0000063
uninduced + +	3	FALSE	FALSE	TRUE	TRUE	1	330000	4	0.0000121
uninduced + +	3	FALSE	FALSE	TRUE	TRUE	2	420000	5	0.0000119
uninduced + +	3	FALSE	FALSE	TRUE	TRUE	3	320000	6	0.0000187
uninduced + +	3	FALSE	FALSE	TRUE	TRUE	NC	250000	8	0.0000320
tol induced - -	4	TRUE	FALSE	FALSE	FALSE	1	3400000	3200	0.0009412
tol induced - -	4	TRUE	FALSE	FALSE	FALSE	2	3000000	2800	0.0009333
tol induced - -	4	TRUE	FALSE	FALSE	FALSE	3	1700000	1600	0.0009412
tol induced - -	4	TRUE	FALSE	FALSE	FALSE	NC	2100000	9	0.0000043
tol induced - +	5	TRUE	FALSE	FALSE	TRUE	1	1760000	8300	0.0047159
tol induced - +	5	TRUE	FALSE	FALSE	TRUE	2	2400000	7100	0.0029583
tol induced - +	5	TRUE	FALSE	FALSE	TRUE	3	2600000	5000	0.0019231
tol induced - +	5	TRUE	FALSE	FALSE	TRUE	NC	3400000	8	0.0000024
tol induced + +	6	TRUE	FALSE	TRUE	TRUE	1	210000	63	0.0003000
tol induced + +	6	TRUE	FALSE	TRUE	TRUE	2	110000	45	0.0004091
tol induced + +	6	TRUE	FALSE	TRUE	TRUE	3	260000	90	0.0003462
tol induced + +	6	TRUE	FALSE	TRUE	TRUE	NC	180000	9	0.0000500
tol + ara induced - -	7	TRUE	TRUE	FALSE	FALSE	1	2600000	2400	0.0009231
tol + ara induced - -	7	TRUE	TRUE	FALSE	FALSE	2	3100000	5600	0.0018065
tol + ara induced - -	7	TRUE	TRUE	FALSE	FALSE	3	2900000	3900	0.0013448
tol + ara induced - -	7	TRUE	TRUE	FALSE	FALSE	NC	1100000	33	0.0000300
tol + ara induced - +	8	TRUE	TRUE	FALSE	TRUE	1	890000	6400	0.0071910
tol + ara induced - +	8	TRUE	TRUE	FALSE	TRUE	2	1900000	10200	0.0053684
tol + ara induced - +	8	TRUE	TRUE	FALSE	TRUE	3	2000000	6600	0.0033000
tol + ara induced - +	8	TRUE	TRUE	FALSE	TRUE	NC	2400000	56	0.0000233
tol + ara induced + +	9	TRUE	TRUE	TRUE	TRUE	1	150000	150	0.0010000
tol + ara induced + +	9	TRUE	TRUE	TRUE	TRUE	2	140000	160	0.0011429
tol + ara induced + +	9	TRUE	TRUE	TRUE	TRUE	3	190000	90	0.0004737
tol + ara induced + +	9	TRUE	TRUE	TRUE	TRUE	NC	40000	6	0.0001500

Now let’s plot the data. Note that the labeling of each condition gets complicated, so we made a special legend with + / - for each possible induction.

#Plot individual replicates, mean points / crossbars, and negative control Xs
plot_cond <- ggplot(df_cond %>% filter(rep != 'NC'), aes(x = factor(cond_id), y = Kan_LB)) + 
  geom_point(data = df_cond %>% filter(rep == 'NC') , shape = 4, color = 'light gray') + 
  geom_jitter(width = 0.1, height = 0, shape =21, alpha = 0.4, color = "#440154FF") + 
  stat_summary(fun = 'mean', geom = 'crossbar', width = 0.5, size = 0.25, color = "#440154FF")+
  stat_summary(fun = 'mean', geom = 'point', width = 0.5, color = "#440154FF")+
  scale_y_log10(labels = scales::label_percent(accuracy = 0.0001), breaks = c(0.000001,0.0001, 0.01))+
  scale_x_discrete(labels = NULL)+
  theme(axis.text.x = element_text(angle = 45, hjust = 1))+
  labs(x = NULL, y = 'Efficiency')

plot_cond_labs <- df_cond %>% filter(rep == 1) %>% select(cond_id, pre_tol, pre_ara, post_tol, post_ara) %>% pivot_longer(cols = c('pre_tol','pre_ara','post_tol','post_ara'), names_to = 'inducer') %>% mutate(value = ifelse(value == T, '+','-')) %>% 
  ggplot(aes(x = factor(cond_id), y = inducer, label = value)) + geom_text() + 
  scale_y_discrete(limits = c('post_ara','pre_ara','post_tol','pre_tol'),labels = c('post ara','pre ara','post tol','pre tol')) + 
  labs(x = NULL, y = NULL) + theme(axis.line = element_blank(), axis.text.x = element_blank(), axis.ticks = element_blank())

plot_cond_legend <- plot_grid(plot_cond, plot_cond_labs, ncol = 1, rel_heights = c(4,1), rel_widths = c(1,1), 
                   align = 'hv', axis = 'lr', scale = 1.0)
plot_cond_legend

Fig. 2C - Targeting oligo length

This experiment tested the effect of targeting oligo length. Identical oligos were used for the four different loci, but their homology arms varied in length. The total length of the oligo is reported here, which includes both homology arms and the 38 bp attB site. Let’s read in the data.

df_len <- read_csv("../../data/low_throughput_experiments/2022_02_15_orbit_TO_len_data.csv") %>% 
  mutate(eff = Kan / LB)

df_len %>% kable() %>% kable_styling() %>% scroll_box(height = '250px')

targeting_oligo	gene	TO_len	replicate	LB	Kan	eff
269	galK	74	1	530000	190	0.0003585
269	galK	74	2	860000	150	0.0001744
269	galK	74	3	1120000	220	0.0001964
264	galK	90	1	970000	28000	0.0288660
264	galK	90	2	400000	7900	0.0197500
264	galK	90	3	930000	9200	0.0098925
270	galK	104	1	920000	25000	0.0271739
270	galK	104	2	940000	22000	0.0234043
270	galK	104	3	1090000	15000	0.0137615
265	galK	120	1	1140000	24000	0.0210526
265	galK	120	2	910000	23000	0.0252747
265	galK	120	3	950000	18000	0.0189474
271	hisA	74	1	810000	230	0.0002840
271	hisA	74	2	890000	150	0.0001685
271	hisA	74	3	1010000	210	0.0002079
272	hisA	90	1	640000	1800	0.0028125
272	hisA	90	2	500000	2400	0.0048000
272	hisA	90	3	820000	3000	0.0036585
273	hisA	104	1	750000	17000	0.0226667
273	hisA	104	2	1130000	24000	0.0212389
273	hisA	104	3	770000	20000	0.0259740
266	hisA	120	1	1900000	27000	0.0142105
266	hisA	120	2	960000	31000	0.0322917
266	hisA	120	3	810000	40000	0.0493827
274	metA	74	1	820000	80	0.0000976
274	metA	74	2	740000	230	0.0003108
274	metA	74	3	740000	19	0.0000257
275	metA	90	1	710000	800	0.0011268
275	metA	90	2	870000	850	0.0009770
275	metA	90	3	790000	580	0.0007342
276	metA	104	1	600000	1200	0.0020000
276	metA	104	2	790000	2500	0.0031646
276	metA	104	3	850000	3400	0.0040000
267	metA	120	1	840000	10700	0.0127381
267	metA	120	2	730000	12800	0.0175342
267	metA	120	3	790000	10200	0.0129114
277	leuD	74	1	1300000	76	0.0000585
277	leuD	74	2	950000	78	0.0000821
277	leuD	74	3	760000	64	0.0000842
278	leuD	90	1	810000	1300	0.0016049
278	leuD	90	2	940000	1260	0.0013404
278	leuD	90	3	750000	1270	0.0016933
279	leuD	104	1	740000	1700	0.0022973
279	leuD	104	2	720000	2200	0.0030556
279	leuD	104	3	1030000	1800	0.0017476
268	leuD	120	1	670000	2800	0.0041791
268	leuD	120	2	570000	1770	0.0031053
268	leuD	120	3	710000	1600	0.0022535
pInt_only	NA	NA	1	1090000	300	0.0002752
pInt_only	NA	NA	2	1190000	250	0.0002101
pInt_only	NA	NA	3	1090000	130	0.0001193

Let’s plot the data.

# Calculate condition means and standard deviations
df_len_summary <- df_len %>% 
  filter(targeting_oligo != 'pInt_only') %>% 
  group_by(gene, TO_len) %>% 
  summarise(mean = mean(eff), sd = sd(eff)) 

# Get negative control value
df_len_control <- df_len %>% filter(targeting_oligo == 'pInt_only')
to_len_pInt <- mean(df_len_control$eff)

# Plot individual replicates, mean points, and connecting lines
plot_to_len <- ggplot(df_len, aes(x = TO_len, y = eff, color = gene)) + 
  geom_hline(yintercept = to_len_pInt, color = 'gray', linetype = 'dashed')+
  geom_jitter(shape = 21, alpha = 0.4, width =1, height =0) +
 geom_point(data = df_len_summary,
                 aes(y = mean),
                 position = position_jitter(height = 0, width = 0.1))+
 # geom_point(data = df_len_summary %>% filter(!(gene=='metA' & TO_len == 74)), 
 #                 aes(y = mean),
 #                 position = position_jitter(height = 0, width = 0.1))+
 # geom_point(data = df_len_summary %>% filter(gene == 'metA' & TO_len == 74), 
 #                 aes(y = mean), 
 #                 position = position_jitter(height = 0, width = 0.5))+
  geom_line(data = df_len_summary, aes(y = mean)) + 
  scale_y_log10(labels = scales::label_percent(accuracy = 0.01)) +
  scale_x_continuous(breaks = c(74,90,104,120))+
  scale_colour_viridis_d(limits = c('galK','hisA','metA','leuD')) + scale_fill_viridis_d(limits = c('galK','hisA','metA','leuD')) +
  labs(x = "Targeting oligo length (nt)", y = "Efficiency", color = NULL)

plot_to_len

Fig. 2D - Leading vs. Lagging strand TO

This experiment tested targeting oligos binding the leading or lagging strand at each locus. 120 nt TOs were used. Let’s read in the data:

df_lag <- read_csv('../../data/low_throughput_experiments/2022_03_07_leading_lagging_data.csv') %>% #read in csv
  mutate(eff = Kan_count / LB_count) %>% group_by(locus, strand) %>% mutate(avg_eff = mean(eff)) %>% #calculate efficiency and average efficiency for replicates
  mutate(locus = factor(locus, levels = c('galK','hisA','metA','leuD','pInt only'))) %>% 
  mutate(strand = factor(strand, levels = c('leading','lagging','none')))

df_lag %>% kable() %>% kable_styling() %>% scroll_box(height = '250px')

locus	strand	replicate	LB_count	Kan_count	eff	avg_eff
galK	lagging	1	23000000	130000	0.0056522	0.0061019
galK	lagging	2	29000000	190000	0.0065517	0.0061019
galK	leading	1	27000000	310	0.0000115	0.0000448
galK	leading	2	7300000	570	0.0000781	0.0000448
hisA	lagging	1	30000000	460000	0.0153333	0.0147037
hisA	lagging	2	27000000	380000	0.0140741	0.0147037
hisA	leading	1	27000000	670	0.0000248	0.0000322
hisA	leading	2	22000000	870	0.0000395	0.0000322
metA	lagging	1	1140000	13500	0.0118421	0.0093622
metA	lagging	2	1700000	11700	0.0068824	0.0093622
metA	leading	1	1700000	1260	0.0007412	0.0009649
metA	leading	2	1220000	1450	0.0011885	0.0009649
leuD	lagging	1	970000	3400	0.0035052	0.0036507
leuD	lagging	2	1080000	4100	0.0037963	0.0036507
leuD	leading	1	1040000	330	0.0003173	0.0002857
leuD	leading	2	1220000	310	0.0002541	0.0002857
pInt only	none	1	1300000	153	0.0001177	0.0000808
pInt only	none	2	2500000	110	0.0000440	0.0000808

Now let’s plot:

#Get negative control values
lag_pInt <- (df_lag %>% filter(strand=='none'))$avg_eff[1]

#Plot individual replicates, mean points and crossbars and negative control values.
plot_lag <- ggplot(df_lag %>% filter(strand!='none'), aes(x = strand, y = eff, color = locus)) + 
  geom_hline(yintercept = lag_pInt, color = 'light gray', linetype = 2)+
  geom_point(position = position_dodge(width = 1), alpha =0.4, fill = NA, shape = 21) +
  stat_summary(fun = 'mean', geom = 'crossbar',position = position_dodge(width = 1), width = 0.5, size = 0.25)+
  stat_summary(fun = 'mean', geom = 'point',position = position_dodge(width = 1), width = 0.5)+
  facet_grid(~locus)+
  scale_color_viridis_d()+
  scale_y_log10(labels = scales::label_percent(accuracy = 0.001), breaks = c(0.0001, 0.001,0.01)) +
  scale_x_discrete(labels = c('lead','lag'))+
  labs(y = 'Efficiency', x = 'Targeting oligo strand', color = NULL)+guides( color = 'none')+
  theme(panel.border = element_rect(color = 'black', fill = NA))
  
plot_lag

Fig. 2E - Targeting oligo concentration

This experiment tested the effect of TO concentration (final in 50 µL cell aliquots). Standard 120 nt TOs were used for each locus. Let’s read in the data:

df_to_conc <- read_csv("../../data/low_throughput_experiments/2022_09_28_TO_conc_4_loci_data.csv")%>% 
  mutate(eff = Kan_count / LB_count) %>% 
  group_by(TO_conc, locus, condition) %>% 
  mutate(avg_eff = mean(eff, na.rm = T)) %>% #calculate efficiency and average efficiency for replicates
  mutate(locus = factor(locus, levels = c('galK','hisA','metA','leuD')))


df_to_conc %>% kable() %>% kable_styling() %>% scroll_box(height = '250px')

condition	TO_conc	locus	replicate	LB_count	Kan_count	eff	avg_eff
4uM galK	4000	galK	1	1300000	570	0.0004385	0.0003604
4uM galK	4000	galK	2	1700000	480	0.0002824	0.0003604
1uM galK	1000	galK	1	1700000	820	0.0004824	0.0004252
1uM galK	1000	galK	2	2500000	920	0.0003680	0.0004252
100nM galK	100	galK	1	2400000	900	0.0003750	0.0005486
100nM galK	100	galK	2	1800000	1300	0.0007222	0.0005486
10nM galK	10	galK	1	1100000	560	0.0005091	0.0002962
10nM galK	10	galK	2	2400000	200	0.0000833	0.0002962
4uM hisA	4000	hisA	1	1700000	1230	0.0007235	0.0008618
4uM hisA	4000	hisA	2	1000000	1000	0.0010000	0.0008618
1uM hisA	1000	hisA	1	1610000	730	0.0004534	0.0002737
1uM hisA	1000	hisA	2	1160000	109	0.0000940	0.0002737
100nM hisA	100	hisA	1	2400000	1600	0.0006667	0.0011333
100nM hisA	100	hisA	2	2500000	4000	0.0016000	0.0011333
10nM hisA	10	hisA	1	2500000	830	0.0003320	0.0007035
10nM hisA	10	hisA	2	1200000	1290	0.0010750	0.0007035
4uM metA	4000	metA	1	2000000	860	0.0004300	0.0004885
4uM metA	4000	metA	2	1700000	930	0.0005471	0.0004885
1uM metA	1000	metA	1	2400000	1800	0.0007500	0.0005950
1uM metA	1000	metA	2	1500000	660	0.0004400	0.0005950
100nM metA	100	metA	1	2200000	1830	0.0008318	0.0010945
100nM metA	100	metA	2	2800000	3800	0.0013571	0.0010945
10nM metA	10	metA	1	1100000	610	0.0005545	0.0007523
10nM metA	10	metA	2	1000000	950	0.0009500	0.0007523
4uM leuD	4000	leuD	1	1700000	330	0.0001941	0.0002037
4uM leuD	4000	leuD	2	1500000	320	0.0002133	0.0002037
1uM leuD	1000	leuD	1	1600000	140	0.0000875	0.0001009
1uM leuD	1000	leuD	2	1400000	160	0.0001143	0.0001009
100nM leuD	100	leuD	1	1500000	480	0.0003200	0.0002475
100nM leuD	100	leuD	2	1600000	280	0.0001750	0.0002475
10nM leuD	10	leuD	1	1800000	80	0.0000444	0.0000568
10nM leuD	10	leuD	2	2600000	180	0.0000692	0.0000568
pInt only	0	NA	1	2900000	30	0.0000103	0.0000172
pInt only	0	NA	2	1500000	36	0.0000240	0.0000172

Let’s plot the data:

#Calculate negative control value
to_conc_pInt <- (df_to_conc %>% filter(condition == 'pInt only'))$avg_eff[1]

#Plot with individual observations, mean points and connecting lines
plot_to_conc <- ggplot(df_to_conc %>% filter(TO_conc>0), aes(x = TO_conc, y = avg_eff, color = locus)) + 
  geom_hline(yintercept = to_conc_pInt, linetype = 2, color = 'light gray')+
  geom_jitter(aes(y = eff), shape = 21, alpha = 0.4, width = 0.1, height = 0)+
  geom_point() + geom_line() + scale_x_log10(breaks = c(10,100,1000,4000), labels = c('10 nM', '100 nM', '1 µM','4 µM')) + scale_color_viridis_d() + 
  scale_y_continuous(labels = scales::label_percent(), trans = 'log10', breaks = c(0.001, 0.0001, 0.00001), limits = c(0.00001,NA))+
  labs(y = 'Efficiency',x = 'Targeting oligo concentration')

plot_to_conc

Fig. 2F - Integrating plasmid added

This experiment tested the effect of how much integrating plasmid (pInt_attP1_kanR) was added to the ORBIT transformation. Let’s read in the data:

df_pint_conc <- read_csv('../../data/low_throughput_experiments/2022_03_31_pInt_conc_galK_hisA_data.csv') %>% #read in csv
  mutate(eff = Kan_count / LB_count) %>% group_by(pInt_ng, TO_added, locus) %>% mutate(avg_eff = mean(eff, na.rm = T)) #calculate efficiency and average efficiency for replicates

df_pint_conc %>% kable() %>% kable_styling() %>% scroll_box(height = '250px')

condition	pInt_ng	TO_added	locus	replicate	LB_count	Kan_count	eff	avg_eff
1ng pInt	1	FALSE	pInt_only	1	1.02e+08	7	0.0000001	0.0000001
1ng pInt	1	FALSE	pInt_only	2	7.00e+07	8	0.0000001	0.0000001
10ng pInt	10	FALSE	pInt_only	1	6.60e+07	20	0.0000003	0.0000003
10ng pInt	10	FALSE	pInt_only	2	7.90e+07	20	0.0000003	0.0000003
100ng pInt	100	FALSE	pInt_only	1	6.80e+07	290	0.0000043	0.0000033
100ng pInt	100	FALSE	pInt_only	2	7.20e+07	170	0.0000024	0.0000033
278ng pInt	278	FALSE	pInt_only	1	7.20e+07	280	0.0000039	0.0000061
278ng pInt	278	FALSE	pInt_only	2	6.20e+07	520	0.0000084	0.0000061
1ng pInt + p265	1	TRUE	galK	1	6.90e+07	510	0.0000074	0.0000099
1ng pInt + p265	1	TRUE	galK	2	6.40e+07	800	0.0000125	0.0000099
10ng pInt + p265	10	TRUE	galK	1	5.80e+07	4300	0.0000741	0.0001103
10ng pInt + p265	10	TRUE	galK	2	5.60e+07	8200	0.0001464	0.0001103
100ng pInt + p265	100	TRUE	galK	1	2.50e+07	35000	0.0014000	0.0016583
100ng pInt + p265	100	TRUE	galK	2	2.40e+07	46000	0.0019167	0.0016583
278ng pInt + p265	278	TRUE	galK	1	2.10e+07	75000	0.0035714	0.0028151
278ng pInt + p265	278	TRUE	galK	2	1.70e+07	35000	0.0020588	0.0028151
1ng pInt + p266	1	TRUE	hisA	1	3.10e+07	940	0.0000303	0.0000311
1ng pInt + p266	1	TRUE	hisA	2	2.60e+07	830	0.0000319	0.0000311
10ng pInt + p266	10	TRUE	hisA	1	7.00e+07	22000	0.0003143	0.0003264
10ng pInt + p266	10	TRUE	hisA	2	6.50e+07	22000	0.0003385	0.0003264
100ng pInt + p266	100	TRUE	hisA	1	6.10e+07	97000	0.0015902	0.0015902
100ng pInt + p266	100	TRUE	hisA	2	5.60e+07	NA	NA	0.0015902
278ng pInt + p266	278	TRUE	hisA	1	5.20e+07	180000	0.0034615	0.0054808
278ng pInt + p266	278	TRUE	hisA	2	5.60e+07	420000	0.0075000	0.0054808

Note that the second hisA 100ng data point is NA because it came back as zero colonies on the plate, which was an obvious error.

plot_pint_conc <- ggplot(df_pint_conc, aes(x = pInt_ng, y = eff, color = locus )) + 
  geom_path(data = df_pint_conc %>% group_by(pInt_ng, TO_added,locus, avg_eff) %>% summarise(), 
            aes(x = pInt_ng, y = avg_eff, group = locus), size = 0.5) +
    geom_jitter(shape = 21, width = 0.025, height = 0, alpha = 0.4) +
  geom_point(data = . %>% filter(replicate==1), aes(y = avg_eff))+
  scale_color_manual(values = c("#440154FF","#21908CFF", 'light gray'), labels = c('galK','hisA','control'))+
  scale_x_log10()+
  scale_y_log10(labels = scales::label_percent(accuracy = 0.0001))+
  labs(x = 'Integrating plasmid concentration (ng) ', y ='Efficiency', fill = 'Locus')

plot_pint_conc

Fig. 2G - Arabinose Recovery Levels

This experiment tested the effect of arabinose (bxb-1 inducer for pHelper-Ec1-gentR) in a 1 hr recovery culture. Let’s read in the data:

df_ara <- read_csv('../../data/low_throughput_experiments/2022_06_21_arabinose_levels_data.csv') %>% #read in csv
  mutate(eff = Kan_count / LB_count) %>% group_by(arabinose_per, locus) %>% mutate(avg_eff = mean(eff)) #calculate efficiency and average efficiency for replicates

df_ara %>% kable() %>% kable_styling() %>% scroll_box(height = '250px')

condition	arabinose_per	TO	locus	replicate	LB_count	Kan_count	eff	avg_eff
0% arabinose	0.00	p265	galK	1	430000	370	0.0008605	0.0006352
0% arabinose	0.00	p265	galK	2	610000	250	0.0004098	0.0006352
0.01 % arabinose	0.01	p265	galK	1	680000	550	0.0008088	0.0008317
0.01 % arabinose	0.01	p265	galK	2	550000	470	0.0008545	0.0008317
0.1 % arabinose	0.10	p265	galK	1	440000	960	0.0021818	0.0018556
0.1 % arabinose	0.10	p265	galK	2	510000	780	0.0015294	0.0018556
1 % arabinose	1.00	p265	galK	1	550000	1490	0.0027091	0.0022013
1 % arabinose	1.00	p265	galK	2	620000	1050	0.0016935	0.0022013
0% arabinose	0.00	p266	hisA	1	440000	93	0.0002114	0.0002514
0% arabinose	0.00	p266	hisA	2	470000	137	0.0002915	0.0002514
0.01 % arabinose	0.01	p266	hisA	1	560000	390	0.0006964	0.0012019
0.01 % arabinose	0.01	p266	hisA	2	410000	700	0.0017073	0.0012019
0.1 % arabinose	0.10	p266	hisA	1	600000	790	0.0013167	0.0017833
0.1 % arabinose	0.10	p266	hisA	2	520000	1170	0.0022500	0.0017833
1 % arabinose	1.00	p266	hisA	1	540000	1260	0.0023333	0.0022768
1 % arabinose	1.00	p266	hisA	2	590000	1310	0.0022203	0.0022768
0% arabinose	0.00	pInt only	pInt_only	1	500000	6	0.0000120	0.0000122
0% arabinose	0.00	pInt only	pInt_only	2	560000	7	0.0000125	0.0000122
0.01 % arabinose	0.01	pInt only	pInt_only	1	560000	7	0.0000125	0.0000138
0.01 % arabinose	0.01	pInt only	pInt_only	2	530000	8	0.0000151	0.0000138
0.1 % arabinose	0.10	pInt only	pInt_only	1	570000	8	0.0000140	0.0000144
0.1 % arabinose	0.10	pInt only	pInt_only	2	610000	9	0.0000148	0.0000144
1 % arabinose	1.00	pInt only	pInt_only	1	470000	20	0.0000426	0.0000391
1 % arabinose	1.00	pInt only	pInt_only	2	450000	16	0.0000356	0.0000391

Now let’s plot:

plot_ara <- ggplot(df_ara, aes(x = factor(arabinose_per), y = eff, color = locus )) + 
  geom_path(data = df_ara %>% group_by(arabinose_per,locus, avg_eff) %>% summarise(), 
            aes(x = factor(arabinose_per), y = avg_eff,group = locus), size = 0.5) +
    geom_jitter(shape = 21, width = 0.025, height = 0, alpha = 0.4) + 
  geom_point(data = . %>% filter(replicate ==1), aes(y = avg_eff))+
  scale_fill_viridis_d(labels = c('galK','hisA','control'))+
  scale_color_manual(values = c("#440154FF","#21908CFF", 'light gray'), labels = c('galK','hisA','control'))+
  scale_y_log10(labels = scales::label_percent(accuracy = 0.001))+
  labs(x = 'Arabinose %', y ='Efficiency', fill = 'Locus')

plot_ara

Fig. 2H - Recovery time

This experiment tested the effect of recoverying in arabinose following the ORBIT transformation for different periods of time.

df_timing <- read_csv('../../data/low_throughput_experiments/2022_07_13_recovery_time_data.csv') %>% #read in csv
  mutate(eff = Kan_count / LB_count) %>% group_by(rec_time, locus) %>% mutate(avg_eff = mean(eff, na.rm = T))#calculate efficiency and average efficiency for replicates

df_timing %>% kable() %>% kable_styling() %>% scroll_box(height = '250px')

condition	rec_time	locus	replicate	LB_count	Kan_count	eff	avg_eff
p265 0hr	0.0	galK	1	58000	0	0.0000000	0.0000000
p265 0hr	0.0	galK	2	62000	0	0.0000000	0.0000000
p265 30min	0.5	galK	1	85000	46	0.0005412	0.0008004
p265 30min	0.5	galK	2	84000	89	0.0010595	0.0008004
p265 1hr	1.0	galK	1	69000	270	0.0039130	0.0043069
p265 1hr	1.0	galK	2	117000	550	0.0047009	0.0043069
p265 3hr	3.0	galK	1	3700000	1700	0.0004595	0.0005538
p265 3hr	3.0	galK	2	5400000	3500	0.0006481	0.0005538
p265 6hr	6.0	galK	1	6800000	2600	0.0003824	0.0004700
p265 6hr	6.0	galK	2	5200000	2900	0.0005577	0.0004700
p266 0hr	0.0	hisA	1	64000	0	0.0000000	0.0000000
p266 0hr	0.0	hisA	2	61000	0	0.0000000	0.0000000
p266 30min	0.5	hisA	1	320000	40	0.0001250	0.0001654
p266 30min	0.5	hisA	2	340000	70	0.0002059	0.0001654
p266 1hr	1.0	hisA	1	360000	590	0.0016389	0.0012694
p266 1hr	1.0	hisA	2	400000	360	0.0009000	0.0012694
p266 3hr	3.0	hisA	1	3200000	4600	0.0014375	0.0010187
p266 3hr	3.0	hisA	2	6000000	3600	0.0006000	0.0010187
p266 6hr	6.0	hisA	1	5500000	3700	0.0006727	0.0006248
p266 6hr	6.0	hisA	2	5200000	3000	0.0005769	0.0006248
pInt 0hr	0.0	pInt_only	1	56000	0	0.0000000	0.0000000
pInt 0hr	0.0	pInt_only	2	47000	0	0.0000000	0.0000000
pInt 30min	0.5	pInt_only	1	310000	1	0.0000032	0.0000035
pInt 30min	0.5	pInt_only	2	260000	1	0.0000038	0.0000035
pInt 1hr	1.0	pInt_only	1	370000	8	0.0000216	0.0000150
pInt 1hr	1.0	pInt_only	2	360000	3	0.0000083	0.0000150
pInt 3hr	3.0	pInt_only	1	4900000	21	0.0000043	0.0000048
pInt 3hr	3.0	pInt_only	2	4000000	21	0.0000052	0.0000048
pInt 6hr	6.0	pInt_only	1	6000000	10	0.0000017	0.0000018
pInt 6hr	6.0	pInt_only	2	4800000	9	0.0000019	0.0000018

You can see from the table, at zero hrs no colonies were recovered in any of the conditions. These points are not visible on the log scale of the plot, but they were indeed measured. Let’s plot:

plot_timing <- ggplot(df_timing, aes(x = factor(rec_time), y = eff, color = locus )) + 
  geom_path(data = df_timing %>% group_by(rec_time,locus, avg_eff) %>% summarise(), 
            aes(x = factor(rec_time), y = avg_eff, group = locus), size = 0.5) +
  geom_jitter(shape = 21, width = 0.025, height = 0, alpha =0.4) + 
  geom_point(data = . %>% filter(replicate ==1), aes(y=avg_eff))+
  scale_color_manual(values = c("#440154FF","#21908CFF", 'light gray'), labels = c('galK','hisA','control'))+
  scale_y_log10(labels = scales::label_percent(accuracy = 0.001))+
  scale_x_discrete(labels = c(  '0 min', '30 min', '1 hr', '3 hr', '6 hr'))+
  labs(x = 'Recovery time', y ='Efficiency', fill = 'Locus')

plot_timing

Create Fig. 2

theme_set(theme_figure())

plot_cond_legend <- plot_grid(plot_cond, plot_cond_labs, ncol = 1, rel_heights = c(3,1), rel_widths = c(1,1), 
                   align = 'hv', axis = 'lr', scale = 0.9)

fig_2_bottom <- plot_grid(plot_to_len + guides(color = 'none', fill = 'none'), plot_lag+ guides(shape = 'none'),
                   plot_to_conc+ guides(color = 'none', fill = 'none'),plot_pint_conc+ guides(color = 'none'), 
                   plot_ara + guides(color = 'none'), plot_timing+ guides(color = 'none'), 
                   ncol = 2, rel_heights = c(1,1), rel_widths = c(1,1), 
                   align = 'hv', axis = 'lr', scale = 0.9,
                   labels = c('C','D','E','F','G','H'))

fig_2 <- plot_grid(plot_cond_legend, fig_2_bottom, ncol = 1, rel_heights = c(1,3), scale = 1.0, labels = c('B'))


fig_2

save_plot("../../figures/r_pdf_figs/main_figs/fig_2_optimization.pdf", fig_2, base_width = 7, base_height = 7)

sessionInfo()

## R version 4.2.0 (2022-04-22)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Big Sur/Monterey 10.16
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.2/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.2/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] kableExtra_1.3.4  cowplot_1.1.1     viridis_0.6.2     viridisLite_0.4.1
##  [5] knitr_1.41        forcats_0.5.2     stringr_1.5.0     dplyr_1.1.0      
##  [9] purrr_0.3.5       readr_2.1.3       tidyr_1.2.1       tibble_3.1.8     
## [13] ggplot2_3.4.0     tidyverse_1.3.2  
## 
## loaded via a namespace (and not attached):
##  [1] httr_1.4.4          sass_0.4.4          bit64_4.0.5        
##  [4] vroom_1.6.0         jsonlite_1.8.3      modelr_0.1.10      
##  [7] bslib_0.4.1         assertthat_0.2.1    highr_0.9          
## [10] googlesheets4_1.0.1 cellranger_1.1.0    yaml_2.3.6         
## [13] pillar_1.8.1        backports_1.4.1     glue_1.6.2         
## [16] digest_0.6.30       rvest_1.0.3         colorspace_2.0-3   
## [19] htmltools_0.5.4     pkgconfig_2.0.3     broom_1.0.1        
## [22] haven_2.5.1         scales_1.2.1        webshot_0.5.4      
## [25] svglite_2.1.0       tzdb_0.3.0          timechange_0.1.1   
## [28] googledrive_2.0.0   generics_0.1.3      farver_2.1.1       
## [31] ellipsis_0.3.2      cachem_1.0.6        withr_2.5.0        
## [34] cli_3.4.1           magrittr_2.0.3      crayon_1.5.2       
## [37] readxl_1.4.1        evaluate_0.18       fs_1.5.2           
## [40] fansi_1.0.3         xml2_1.3.3          textshaping_0.3.6  
## [43] tools_4.2.0         hms_1.1.2           gargle_1.2.1       
## [46] lifecycle_1.0.3     munsell_0.5.0       reprex_2.0.2       
## [49] compiler_4.2.0      jquerylib_0.1.4     systemfonts_1.0.4  
## [52] rlang_1.0.6         grid_4.2.0          rstudioapi_0.14    
## [55] labeling_0.4.2      rmarkdown_2.18      gtable_0.3.1       
## [58] DBI_1.1.3           R6_2.5.1            gridExtra_2.3      
## [61] lubridate_1.9.0     fastmap_1.1.0       bit_4.0.5          
## [64] utf8_1.2.2          ragg_1.2.5          stringi_1.7.8      
## [67] parallel_4.2.0      vctrs_0.5.2         dbplyr_2.2.1       
## [70] tidyselect_1.2.0    xfun_0.35

Figure 2: Protocol Optimization

E. coli ORBIT 2023

Scott H. Saunders