Notes

This figure contains several datatypes from experiments that were used benchmark ORBIT and compare to the lambda Red method. Data figures were made in R notebooks and exported as pdfs. Cosmetic improvements were made in Adobe Illustrator. Note that Figures 3A & 3C were made in Adobe Illustrator.

Setup packages and plotting for the notebook:

# Check packages
source("../tools/package_setup.R")
# Load packages
library(tidyverse)
library(cowplot)
library(kableExtra)
# Code display options
knitr::opts_chunk$set(tidy.opts=list(width.cutoff=60),tidy=FALSE, echo = TRUE, message=FALSE, warning=FALSE, fig.align="center", fig.retina = 2)
# Load plotting tools
source("../tools/plotting_tools.R")
#Modify the plot theme
theme_set(theme_notebook())

Fig. 3B - ORBIT vs. Lambda Red efficiency

Read in the ORBIT data.

df_gold <- read_csv('../../data/low_throughput_experiments/2022_08_31_gold_stds.csv') %>% #read in csv
  mutate(eff = Kan_count / LB_count) %>% group_by(locus) %>% mutate(avg_eff = mean(eff)) #calculate efficiency and average efficiency for replicates

df_gold %>% kable() %>% kable_styling() %>% scroll_box(height = '250px')

locus	replicate	LB_count	Kan_count	eff	avg_eff
galK	1	870000	870	0.0010000	0.0014561
galK	2	1140000	1560	0.0013684	0.0014561
galK	3	760000	1520	0.0020000	0.0014561
hisA	1	970000	4100	0.0042268	0.0045716
hisA	2	860000	2600	0.0030233	0.0045716
hisA	3	990000	6400	0.0064646	0.0045716
metA	1	900000	2300	0.0025556	0.0035410
metA	2	600000	3300	0.0055000	0.0035410
metA	3	740000	1900	0.0025676	0.0035410
leuD	1	910000	350	0.0003846	0.0003861
leuD	2	850000	310	0.0003647	0.0003861
leuD	3	1100000	450	0.0004091	0.0003861
	1	960000	44	0.0000458	0.0000562
	2	800000	52	0.0000650	0.0000562
	3	950000	55	0.0000579	0.0000562

Read in the lambda Red data.

df_lam <- read_csv('../../data/low_throughput_experiments/2022_09_07_lambda_red_galK_AA_eff.csv') %>% #read in csv
  mutate(eff = Kan_count / LB_count) %>% group_by(locus) %>% mutate(avg_eff = mean(eff)) #calculate efficiency and average efficiency for replicates

df_lam %>% kable() %>% kable_styling() %>% scroll_box(height = '250px')

locus	replicate	LB_count	Kan_count	eff	avg_eff
galK	1	34000000	83	2.4e-06	2.8e-06
galK	2	52000000	124	2.4e-06	2.8e-06
galK	3	31000000	108	3.5e-06	2.8e-06
hisA	1	32000000	143	4.5e-06	3.5e-06
hisA	2	59000000	180	3.1e-06	3.5e-06
hisA	3	48000000	140	2.9e-06	3.5e-06
metA	1	18000000	96	5.3e-06	5.3e-06
metA	2	10400000	72	6.9e-06	5.3e-06
metA	3	20000000	73	3.7e-06	5.3e-06
leuD	1	16400000	31	1.9e-06	1.2e-06
leuD	2	66000000	49	7.0e-07	1.2e-06
leuD	3	26000000	29	1.1e-06	1.2e-06
	1	21000000	8	4.0e-07	2.0e-07
	2	35000000	3	1.0e-07	2.0e-07
	3	27000000	5	2.0e-07	2.0e-07

Next we combine the ORBIT and lambda Red data and calculate the ratio of efficiencies:

df_lam_gold <- bind_rows(df_gold %>% mutate(exp = 'ORBIT'), df_lam %>% mutate(exp = 'lambda_red')) %>% 
  mutate(locus = factor(locus, levels =c('galK','hisA','metA','leuD','-'))) %>% 
  mutate(exp = factor(exp, levels = c('ORBIT','lambda_red')))

df_lam_gold %>% filter(replicate == 1) %>% 
  select(-LB_count,-Kan_count,-eff) %>% 
  pivot_wider(names_from = exp, values_from = avg_eff) %>% 
  mutate(orbit_lam_ratio = ORBIT / lambda_red)

## # A tibble: 5 × 5
## # Groups:   locus [5]
##   locus replicate     ORBIT  lambda_red orbit_lam_ratio
##   <fct>     <dbl>     <dbl>       <dbl>           <dbl>
## 1 galK          1 0.00146   0.00000277             526.
## 2 hisA          1 0.00457   0.00000348            1314.
## 3 metA          1 0.00354   0.00000530             668.
## 4 leuD          1 0.000386  0.00000125             309.
## 5 -             1 0.0000562 0.000000217            259.

Then we can plot the data:

#Calculate negative control values for ORBIT and lambda red
orbit_bg <- (df_lam_gold %>% filter(locus=='-' & exp == 'ORBIT'))$avg_eff[1]
lam_bg <-  (df_lam_gold %>% filter(locus=='-' & exp == 'lambda_red'))$avg_eff[1]

#Plot individual datapoints, mean dots and crossbars and negative control X's
plot_lam_gold <- ggplot(df_lam_gold %>% filter(locus!='-') %>% mutate(bg = ifelse(exp == 'ORBIT',orbit_bg, lam_bg)), aes(x = exp, y = eff , color = locus)) + 
  geom_point(data = . %>% filter(replicate ==1), aes(y = bg), shape = 4, color= 'light gray')+
  stat_summary(fun = 'mean', geom = 'crossbar',width = 0.5, size = 0.25)+
  stat_summary(fun = 'mean', geom = 'point')+
  geom_point(position = position_jitterdodge(dodge.width = 1, jitter.width = 1), shape = 21) + 
  facet_grid(~locus)+
  scale_color_viridis_d()+
  scale_y_log10(labels = scales::label_percent(accuracy = 0.0001), breaks = c(0.000001, 0.0001, 0.01)) + 
  scale_x_discrete(labels = c('ORBIT','λRed'))+
  labs(y = 'Efficiency', x = NULL, fill = 'Strand') + guides(color = 'none') +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))
  

plot_lam_gold

Fig. 3D - ORBIT accuracy

These data are accuracy measurements where recombinants were tested for growth on permissive and selective media.

df_gold_acc <- read_csv("../../data/low_throughput_experiments/2022_08_30_gold_std_2_accuracy.csv") %>% 
    mutate(accuracy = 1-(selective_colonies / permissive_colonies)) %>% 
  group_by(locus) %>% 
  mutate(avg_accuracy = mean(accuracy)) %>% 
  mutate(locus = factor(locus, levels = c('galK','hisA','metA','leuD')))

df_gold_acc %>% kable() %>% kable_styling() %>% scroll_box(height = '250px')

locus	replicate	permissive_colonies	selective_colonies	accuracy	avg_accuracy
galK	1	14	0	1.0000000	1.0000000
galK	2	14	0	1.0000000	1.0000000
galK	3	14	0	1.0000000	1.0000000
hisA	1	12	0	1.0000000	1.0000000
hisA	2	14	0	1.0000000	1.0000000
hisA	3	14	0	1.0000000	1.0000000
metA	1	14	0	1.0000000	1.0000000
metA	2	14	0	1.0000000	1.0000000
metA	3	14	0	1.0000000	1.0000000
leuD	1	14	0	1.0000000	0.9761905
leuD	2	14	1	0.9285714	0.9761905
leuD	3	14	0	1.0000000	0.9761905

Now let’s plot the data:

plot_gold_acc <- ggplot(df_gold_acc, aes(x = locus, y = accuracy , color = locus)) + 
  geom_hline(yintercept = 0, linetype =2, color = 'light gray')+ #negative control accuracy was 0
  stat_summary(fun = 'mean', geom = 'crossbar',width = 0.5, size = 0.25)+
  stat_summary(fun = 'mean', geom = 'point')+
  geom_jitter(width =0.1, height = 0, shape = 21) + 
  scale_y_continuous(labels = scales::label_percent())+
  labs(y = 'Accuracy', x = 'Locus')+
 # facet_grid(~locus)+
  scale_color_viridis_d()

plot_gold_acc

Create Fig. 3

theme_set(theme_figure())

fig_3_gold <- plot_grid(plot_lam_gold + guides(color = 'none'), plot_gold_acc + guides(color = 'none'), ncol = 1, rel_heights = c(1,0.66),
                   align = 'hv', axis = 'lr', scale = 0.9,
                   labels = c('B','D'))


fig_3_gold

save_plot("../../figures/r_pdf_figs/main_figs/fig_3_gold_std.pdf", fig_3_gold, base_width = 3.5, base_height = 3.75)

sessionInfo()

## R version 4.2.0 (2022-04-22)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Big Sur/Monterey 10.16
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.2/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.2/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] kableExtra_1.3.4  cowplot_1.1.1     viridis_0.6.2     viridisLite_0.4.1
##  [5] knitr_1.41        forcats_0.5.2     stringr_1.5.0     dplyr_1.1.0      
##  [9] purrr_0.3.5       readr_2.1.3       tidyr_1.2.1       tibble_3.1.8     
## [13] ggplot2_3.4.0     tidyverse_1.3.2  
## 
## loaded via a namespace (and not attached):
##  [1] httr_1.4.4          sass_0.4.4          bit64_4.0.5        
##  [4] vroom_1.6.0         jsonlite_1.8.3      modelr_0.1.10      
##  [7] bslib_0.4.1         assertthat_0.2.1    highr_0.9          
## [10] googlesheets4_1.0.1 cellranger_1.1.0    yaml_2.3.6         
## [13] pillar_1.8.1        backports_1.4.1     glue_1.6.2         
## [16] digest_0.6.30       rvest_1.0.3         colorspace_2.0-3   
## [19] htmltools_0.5.4     pkgconfig_2.0.3     broom_1.0.1        
## [22] haven_2.5.1         scales_1.2.1        webshot_0.5.4      
## [25] svglite_2.1.0       tzdb_0.3.0          timechange_0.1.1   
## [28] googledrive_2.0.0   generics_0.1.3      farver_2.1.1       
## [31] ellipsis_0.3.2      cachem_1.0.6        withr_2.5.0        
## [34] cli_3.4.1           magrittr_2.0.3      crayon_1.5.2       
## [37] readxl_1.4.1        evaluate_0.18       fs_1.5.2           
## [40] fansi_1.0.3         xml2_1.3.3          textshaping_0.3.6  
## [43] tools_4.2.0         hms_1.1.2           gargle_1.2.1       
## [46] lifecycle_1.0.3     munsell_0.5.0       reprex_2.0.2       
## [49] compiler_4.2.0      jquerylib_0.1.4     systemfonts_1.0.4  
## [52] rlang_1.0.6         grid_4.2.0          rstudioapi_0.14    
## [55] labeling_0.4.2      rmarkdown_2.18      gtable_0.3.1       
## [58] DBI_1.1.3           R6_2.5.1            gridExtra_2.3      
## [61] lubridate_1.9.0     fastmap_1.1.0       bit_4.0.5          
## [64] utf8_1.2.2          ragg_1.2.5          stringi_1.7.8      
## [67] parallel_4.2.0      vctrs_0.5.2         dbplyr_2.2.1       
## [70] tidyselect_1.2.0    xfun_0.35

Figure 3: Gold Standards

E. coli ORBIT 2023

Scott H. Saunders

Notes

Fig. 3B - ORBIT vs. Lambda Red efficiency

Fig. 3D - ORBIT accuracy

Create Fig. 3