This figure contains several datatypes from experiments that were used benchmark ORBIT and compare to the lambda Red method. Data figures were made in R notebooks and exported as pdfs. Cosmetic improvements were made in Adobe Illustrator. Note that Figures 3A & 3C were made in Adobe Illustrator.
Setup packages and plotting for the notebook:
# Check packages
source("../tools/package_setup.R")
# Load packages
library(tidyverse)
library(cowplot)
library(kableExtra)
# Code display options
knitr::opts_chunk$set(tidy.opts=list(width.cutoff=60),tidy=FALSE, echo = TRUE, message=FALSE, warning=FALSE, fig.align="center", fig.retina = 2)
# Load plotting tools
source("../tools/plotting_tools.R")
#Modify the plot theme
theme_set(theme_notebook())Read in the ORBIT data.
df_gold <- read_csv('../../data/low_throughput_experiments/2022_08_31_gold_stds.csv') %>% #read in csv
  mutate(eff = Kan_count / LB_count) %>% group_by(locus) %>% mutate(avg_eff = mean(eff)) #calculate efficiency and average efficiency for replicates
df_gold %>% kable() %>% kable_styling() %>% scroll_box(height = '250px')| locus | replicate | LB_count | Kan_count | eff | avg_eff | 
|---|---|---|---|---|---|
| galK | 1 | 870000 | 870 | 0.0010000 | 0.0014561 | 
| galK | 2 | 1140000 | 1560 | 0.0013684 | 0.0014561 | 
| galK | 3 | 760000 | 1520 | 0.0020000 | 0.0014561 | 
| hisA | 1 | 970000 | 4100 | 0.0042268 | 0.0045716 | 
| hisA | 2 | 860000 | 2600 | 0.0030233 | 0.0045716 | 
| hisA | 3 | 990000 | 6400 | 0.0064646 | 0.0045716 | 
| metA | 1 | 900000 | 2300 | 0.0025556 | 0.0035410 | 
| metA | 2 | 600000 | 3300 | 0.0055000 | 0.0035410 | 
| metA | 3 | 740000 | 1900 | 0.0025676 | 0.0035410 | 
| leuD | 1 | 910000 | 350 | 0.0003846 | 0.0003861 | 
| leuD | 2 | 850000 | 310 | 0.0003647 | 0.0003861 | 
| leuD | 3 | 1100000 | 450 | 0.0004091 | 0.0003861 | 
| 
 | 
1 | 960000 | 44 | 0.0000458 | 0.0000562 | 
| 
 | 
2 | 800000 | 52 | 0.0000650 | 0.0000562 | 
| 
 | 
3 | 950000 | 55 | 0.0000579 | 0.0000562 | 
Read in the lambda Red data.
df_lam <- read_csv('../../data/low_throughput_experiments/2022_09_07_lambda_red_galK_AA_eff.csv') %>% #read in csv
  mutate(eff = Kan_count / LB_count) %>% group_by(locus) %>% mutate(avg_eff = mean(eff)) #calculate efficiency and average efficiency for replicates
df_lam %>% kable() %>% kable_styling() %>% scroll_box(height = '250px')| locus | replicate | LB_count | Kan_count | eff | avg_eff | 
|---|---|---|---|---|---|
| galK | 1 | 34000000 | 83 | 2.4e-06 | 2.8e-06 | 
| galK | 2 | 52000000 | 124 | 2.4e-06 | 2.8e-06 | 
| galK | 3 | 31000000 | 108 | 3.5e-06 | 2.8e-06 | 
| hisA | 1 | 32000000 | 143 | 4.5e-06 | 3.5e-06 | 
| hisA | 2 | 59000000 | 180 | 3.1e-06 | 3.5e-06 | 
| hisA | 3 | 48000000 | 140 | 2.9e-06 | 3.5e-06 | 
| metA | 1 | 18000000 | 96 | 5.3e-06 | 5.3e-06 | 
| metA | 2 | 10400000 | 72 | 6.9e-06 | 5.3e-06 | 
| metA | 3 | 20000000 | 73 | 3.7e-06 | 5.3e-06 | 
| leuD | 1 | 16400000 | 31 | 1.9e-06 | 1.2e-06 | 
| leuD | 2 | 66000000 | 49 | 7.0e-07 | 1.2e-06 | 
| leuD | 3 | 26000000 | 29 | 1.1e-06 | 1.2e-06 | 
| 
 | 
1 | 21000000 | 8 | 4.0e-07 | 2.0e-07 | 
| 
 | 
2 | 35000000 | 3 | 1.0e-07 | 2.0e-07 | 
| 
 | 
3 | 27000000 | 5 | 2.0e-07 | 2.0e-07 | 
Next we combine the ORBIT and lambda Red data and calculate the ratio of efficiencies:
df_lam_gold <- bind_rows(df_gold %>% mutate(exp = 'ORBIT'), df_lam %>% mutate(exp = 'lambda_red')) %>% 
  mutate(locus = factor(locus, levels =c('galK','hisA','metA','leuD','-'))) %>% 
  mutate(exp = factor(exp, levels = c('ORBIT','lambda_red')))
df_lam_gold %>% filter(replicate == 1) %>% 
  select(-LB_count,-Kan_count,-eff) %>% 
  pivot_wider(names_from = exp, values_from = avg_eff) %>% 
  mutate(orbit_lam_ratio = ORBIT / lambda_red)## # A tibble: 5 × 5
## # Groups:   locus [5]
##   locus replicate     ORBIT  lambda_red orbit_lam_ratio
##   <fct>     <dbl>     <dbl>       <dbl>           <dbl>
## 1 galK          1 0.00146   0.00000277             526.
## 2 hisA          1 0.00457   0.00000348            1314.
## 3 metA          1 0.00354   0.00000530             668.
## 4 leuD          1 0.000386  0.00000125             309.
## 5 -             1 0.0000562 0.000000217            259.
Then we can plot the data:
#Calculate negative control values for ORBIT and lambda red
orbit_bg <- (df_lam_gold %>% filter(locus=='-' & exp == 'ORBIT'))$avg_eff[1]
lam_bg <-  (df_lam_gold %>% filter(locus=='-' & exp == 'lambda_red'))$avg_eff[1]
#Plot individual datapoints, mean dots and crossbars and negative control X's
plot_lam_gold <- ggplot(df_lam_gold %>% filter(locus!='-') %>% mutate(bg = ifelse(exp == 'ORBIT',orbit_bg, lam_bg)), aes(x = exp, y = eff , color = locus)) + 
  geom_point(data = . %>% filter(replicate ==1), aes(y = bg), shape = 4, color= 'light gray')+
  stat_summary(fun = 'mean', geom = 'crossbar',width = 0.5, size = 0.25)+
  stat_summary(fun = 'mean', geom = 'point')+
  geom_point(position = position_jitterdodge(dodge.width = 1, jitter.width = 1), shape = 21) + 
  facet_grid(~locus)+
  scale_color_viridis_d()+
  scale_y_log10(labels = scales::label_percent(accuracy = 0.0001), breaks = c(0.000001, 0.0001, 0.01)) + 
  scale_x_discrete(labels = c('ORBIT','λRed'))+
  labs(y = 'Efficiency', x = NULL, fill = 'Strand') + guides(color = 'none') +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))
  
plot_lam_goldThese data are accuracy measurements where recombinants were tested for growth on permissive and selective media.
df_gold_acc <- read_csv("../../data/low_throughput_experiments/2022_08_30_gold_std_2_accuracy.csv") %>% 
    mutate(accuracy = 1-(selective_colonies / permissive_colonies)) %>% 
  group_by(locus) %>% 
  mutate(avg_accuracy = mean(accuracy)) %>% 
  mutate(locus = factor(locus, levels = c('galK','hisA','metA','leuD')))
df_gold_acc %>% kable() %>% kable_styling() %>% scroll_box(height = '250px')| locus | replicate | permissive_colonies | selective_colonies | accuracy | avg_accuracy | 
|---|---|---|---|---|---|
| galK | 1 | 14 | 0 | 1.0000000 | 1.0000000 | 
| galK | 2 | 14 | 0 | 1.0000000 | 1.0000000 | 
| galK | 3 | 14 | 0 | 1.0000000 | 1.0000000 | 
| hisA | 1 | 12 | 0 | 1.0000000 | 1.0000000 | 
| hisA | 2 | 14 | 0 | 1.0000000 | 1.0000000 | 
| hisA | 3 | 14 | 0 | 1.0000000 | 1.0000000 | 
| metA | 1 | 14 | 0 | 1.0000000 | 1.0000000 | 
| metA | 2 | 14 | 0 | 1.0000000 | 1.0000000 | 
| metA | 3 | 14 | 0 | 1.0000000 | 1.0000000 | 
| leuD | 1 | 14 | 0 | 1.0000000 | 0.9761905 | 
| leuD | 2 | 14 | 1 | 0.9285714 | 0.9761905 | 
| leuD | 3 | 14 | 0 | 1.0000000 | 0.9761905 | 
Now let’s plot the data:
plot_gold_acc <- ggplot(df_gold_acc, aes(x = locus, y = accuracy , color = locus)) + 
  geom_hline(yintercept = 0, linetype =2, color = 'light gray')+ #negative control accuracy was 0
  stat_summary(fun = 'mean', geom = 'crossbar',width = 0.5, size = 0.25)+
  stat_summary(fun = 'mean', geom = 'point')+
  geom_jitter(width =0.1, height = 0, shape = 21) + 
  scale_y_continuous(labels = scales::label_percent())+
  labs(y = 'Accuracy', x = 'Locus')+
 # facet_grid(~locus)+
  scale_color_viridis_d()
plot_gold_acctheme_set(theme_figure())
fig_3_gold <- plot_grid(plot_lam_gold + guides(color = 'none'), plot_gold_acc + guides(color = 'none'), ncol = 1, rel_heights = c(1,0.66),
                   align = 'hv', axis = 'lr', scale = 0.9,
                   labels = c('B','D'))
fig_3_goldsave_plot("../../figures/r_pdf_figs/main_figs/fig_3_gold_std.pdf", fig_3_gold, base_width = 3.5, base_height = 3.75)sessionInfo()## R version 4.2.0 (2022-04-22)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Big Sur/Monterey 10.16
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.2/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.2/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] kableExtra_1.3.4  cowplot_1.1.1     viridis_0.6.2     viridisLite_0.4.1
##  [5] knitr_1.41        forcats_0.5.2     stringr_1.5.0     dplyr_1.1.0      
##  [9] purrr_0.3.5       readr_2.1.3       tidyr_1.2.1       tibble_3.1.8     
## [13] ggplot2_3.4.0     tidyverse_1.3.2  
## 
## loaded via a namespace (and not attached):
##  [1] httr_1.4.4          sass_0.4.4          bit64_4.0.5        
##  [4] vroom_1.6.0         jsonlite_1.8.3      modelr_0.1.10      
##  [7] bslib_0.4.1         assertthat_0.2.1    highr_0.9          
## [10] googlesheets4_1.0.1 cellranger_1.1.0    yaml_2.3.6         
## [13] pillar_1.8.1        backports_1.4.1     glue_1.6.2         
## [16] digest_0.6.30       rvest_1.0.3         colorspace_2.0-3   
## [19] htmltools_0.5.4     pkgconfig_2.0.3     broom_1.0.1        
## [22] haven_2.5.1         scales_1.2.1        webshot_0.5.4      
## [25] svglite_2.1.0       tzdb_0.3.0          timechange_0.1.1   
## [28] googledrive_2.0.0   generics_0.1.3      farver_2.1.1       
## [31] ellipsis_0.3.2      cachem_1.0.6        withr_2.5.0        
## [34] cli_3.4.1           magrittr_2.0.3      crayon_1.5.2       
## [37] readxl_1.4.1        evaluate_0.18       fs_1.5.2           
## [40] fansi_1.0.3         xml2_1.3.3          textshaping_0.3.6  
## [43] tools_4.2.0         hms_1.1.2           gargle_1.2.1       
## [46] lifecycle_1.0.3     munsell_0.5.0       reprex_2.0.2       
## [49] compiler_4.2.0      jquerylib_0.1.4     systemfonts_1.0.4  
## [52] rlang_1.0.6         grid_4.2.0          rstudioapi_0.14    
## [55] labeling_0.4.2      rmarkdown_2.18      gtable_0.3.1       
## [58] DBI_1.1.3           R6_2.5.1            gridExtra_2.3      
## [61] lubridate_1.9.0     fastmap_1.1.0       bit_4.0.5          
## [64] utf8_1.2.2          ragg_1.2.5          stringi_1.7.8      
## [67] parallel_4.2.0      vctrs_0.5.2         dbplyr_2.2.1       
## [70] tidyselect_1.2.0    xfun_0.35