This figure contains several datatypes from experiments where ORBIT was used for large genomic deletions and integrations. Data figures were made in R notebooks and exported as pdfs. Cosmetic improvements were made in Adobe Illustrator. Note that Figures 4A, 4D & 4F were made in Adobe Illustrator.
Setup packages and plotting for the notebook:
# Check packages
source("../tools/package_setup.R")
# Load packages
library(tidyverse)
library(cowplot)
library(kableExtra)
# Code display options
::opts_chunk$set(tidy.opts=list(width.cutoff=60),tidy=FALSE, echo = TRUE, message=FALSE, warning=FALSE, fig.align="center", fig.retina = 2)
knitr# Load plotting tools
source("../tools/plotting_tools.R")
#Modify the plot theme
theme_set(theme_notebook())
The galK deletions were done separately from the other loci, so let’s read that data in first.
<- read_csv('../../data/low_throughput_experiments/2022_11_09_galK_sizes_3_efficiency.csv') %>% #read in csv
df_gal_sizes mutate(eff = Kan_count / LB_count) %>% group_by(del_size) %>% mutate(avg_eff = mean(eff)) #calculate efficiency and average efficiency for replicates
#calculate negative control value from galK experiment
<- (df_gal_sizes %>% filter(del_size == '-'))$avg_eff[1]
gal_sizes_pInt
%>% kable() %>% kable_styling() %>% scroll_box(height = '250px') df_gal_sizes
locus | del_size | replicate | LB_count | Kan_count | eff | avg_eff |
---|---|---|---|---|---|---|
galK | 1122 | 1 | 13900000 | 33000 | 0.0023741 | 0.0026285 |
galK | 1122 | 2 | 11100000 | 32000 | 0.0028829 | 0.0026285 |
galK | 4258 | 1 | 10800000 | 1230 | 0.0001139 | 0.0001266 |
galK | 4258 | 2 | 9900000 | 1380 | 0.0001394 | 0.0001266 |
galK | 10762 | 1 | 9400000 | 430 | 0.0000457 | 0.0000592 |
galK | 10762 | 2 | 9500000 | 690 | 0.0000726 | 0.0000592 |
galK | 24027 | 1 | 10600000 | 990 | 0.0000934 | 0.0000920 |
galK | 24027 | 2 | 11700000 | 1060 | 0.0000906 | 0.0000920 |
galK | 49068 | 1 | 11400000 | 160 | 0.0000140 | 0.0000164 |
galK | 49068 | 2 | 15000000 | 280 | 0.0000187 | 0.0000164 |
galK |
|
1 | 11900000 | 240 | 0.0000202 | 0.0000243 |
galK |
|
2 | 8800000 | 250 | 0.0000284 | 0.0000243 |
Now we will read in the data for the other loci (hisA, metA, and leuD).
<- read_csv('../../data/low_throughput_experiments/2022_09_07_AA_del_sizes_eff.csv') %>% #read in csv
df_aa_sizes mutate(eff = Kan_count / LB_count) %>% group_by(locus, del_size) %>% mutate(avg_eff = mean(eff)) #calculate efficiency and average efficiency for replicates
%>% kable() %>% kable_styling() %>% scroll_box(height = '250px') df_aa_sizes
locus | del_size | replicate | LB_count | Kan_count | eff | avg_eff |
---|---|---|---|---|---|---|
hisA | 100 | 1 | 370000 | 1120 | 0.0030270 | 0.0024635 |
hisA | 100 | 2 | 900000 | 1710 | 0.0019000 | 0.0024635 |
hisA | 700 | 1 | 800000 | 5100 | 0.0063750 | 0.0061420 |
hisA | 700 | 2 | 880000 | 5200 | 0.0059091 | 0.0061420 |
hisA | 7000 | 1 | 780000 | 100 | 0.0001282 | 0.0001195 |
hisA | 7000 | 2 | 830000 | 92 | 0.0001108 | 0.0001195 |
metA | 100 | 1 | 780000 | 8400 | 0.0107692 | 0.0102826 |
metA | 100 | 2 | 980000 | 9600 | 0.0097959 | 0.0102826 |
metA | 900 | 1 | 1250000 | 6000 | 0.0048000 | 0.0044886 |
metA | 900 | 2 | 790000 | 3300 | 0.0041772 | 0.0044886 |
metA | 13000 | 1 | 890000 | 1010 | 0.0011348 | 0.0010500 |
metA | 13000 | 2 | 860000 | 830 | 0.0009651 | 0.0010500 |
leuD | 100 | 1 | 880000 | 1370 | 0.0015568 | 0.0014955 |
leuD | 100 | 2 | 760000 | 1090 | 0.0014342 | 0.0014955 |
leuD | 600 | 1 | 1010000 | 720 | 0.0007129 | 0.0005181 |
leuD | 600 | 2 | 990000 | 320 | 0.0003232 | 0.0005181 |
leuD | 6000 | 1 | 760000 | 430 | 0.0005658 | 0.0005980 |
leuD | 6000 | 2 | 730000 | 460 | 0.0006301 | 0.0005980 |
galK | 1000 | 1 | 700000 | 3600 | 0.0051429 | 0.0039413 |
galK | 1000 | 2 | 730000 | 2000 | 0.0027397 | 0.0039413 |
#combine datasets
<- bind_rows(df_gal_sizes %>% filter(del_size!='-') %>% mutate(del_size = as.numeric(del_size)),
df_gal_aa_sizes %>% filter(locus!='galK')
df_aa_sizes %>%
) mutate(locus = factor(locus, levels = c( 'galK','hisA','metA','leuD')))
#Plot with individual datapoints, and mean points connected by lines
<- ggplot(df_gal_aa_sizes, aes(x = del_size, y = eff, color = locus)) +
plot_gal_aa_sizes geom_hline(yintercept = gal_sizes_pInt, linetype = 2, color = 'light gray')+
geom_jitter(shape = 21, width = 0.05, height = 0, alpha = 0.4) +
geom_point(data = . %>% filter(replicate==1), aes(y = avg_eff)) +
geom_line(data = . %>% filter(replicate==1), aes(y = avg_eff)) +
scale_y_log10(labels = scales::label_percent(accuracy = 0.01)) +
scale_x_log10(breaks = c(100, 1000, 10000, 50000), labels = c('100 bp', '1 kb', '10 kb', '50 kb'))+
labs(y = 'Efficiency', x = 'Deletion Size')+
scale_color_viridis_d()
plot_gal_aa_sizes
Again galK was done separately from the other loci, so let’s read in that data first:
<- read_csv("../../data/low_throughput_experiments/2022_11_09_galK_sizes_3_accuracy.csv") %>%
df_galK_pin mutate(accuracy = 1-(selective_colonies / permissive_colonies)) %>%
group_by(locus, deletion_size) %>%
mutate(avg_accuracy = mean(accuracy))
df_galK_pin
## # A tibble: 12 × 9
## # Groups: locus, deletion_size [6]
## condition locus deletion_size pInt_…¹ repli…² permi…³ selec…⁴ accur…⁵ avg_a…⁶
## <chr> <chr> <chr> <lgl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 galK_1kb galK 1122 FALSE 1 8 0 1 1
## 2 galK_1kb galK 1122 FALSE 2 8 0 1 1
## 3 galK_4kb galK 4258 FALSE 1 8 0 1 0.938
## 4 galK_4kb galK 4258 FALSE 2 8 1 0.875 0.938
## 5 galK_10kb galK 10762 FALSE 1 8 1 0.875 0.875
## 6 galK_10kb galK 10762 FALSE 2 8 1 0.875 0.875
## 7 galK_24kb galK 24027 FALSE 1 8 0 1 1
## 8 galK_24kb galK 24027 FALSE 2 8 0 1 1
## 9 galK_49kb galK 49068 FALSE 1 8 1 0.875 0.875
## 10 galK_49kb galK 49068 FALSE 2 8 1 0.875 0.875
## 11 pInt_only galK - TRUE 1 8 8 0 0.0625
## 12 pInt_only galK - TRUE 2 8 7 0.125 0.0625
## # … with abbreviated variable names ¹pInt_only, ²replicate,
## # ³permissive_colonies, ⁴selective_colonies, ⁵accuracy, ⁶avg_accuracy
Then we will read in the data for hisA, metA and leuD:
<- read_csv("../../data/low_throughput_experiments/2022_09_07_AA_sizes_pin_plate_count.csv") %>%
df_aa_pin mutate(accuracy = 1-(selective_colonies / permissive_colonies)) %>%
group_by(locus, deletion_size) %>%
mutate(avg_accuracy = mean(accuracy)) %>%
mutate(locus = factor(locus, levels = c('hisA','metA','leuD')))
df_aa_pin
## # A tibble: 18 × 9
## # Groups: locus, deletion_size [9]
## condition locus delet…¹ pInt_…² repli…³ permi…⁴ selec…⁵ accur…⁶ avg_a…⁷
## <chr> <fct> <dbl> <lgl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 hisA 100 bp del hisA 100 FALSE 1 7 0 1 1
## 2 hisA 100 bp del hisA 100 FALSE 2 7 0 1 1
## 3 hisA 700 bp del hisA 700 FALSE 1 7 0 1 1
## 4 hisA 700 bp del hisA 700 FALSE 2 7 0 1 1
## 5 hisA 7000 bp d… hisA 7000 FALSE 1 7 0 1 0.929
## 6 hisA 7000 bp d… hisA 7000 FALSE 2 7 1 0.857 0.929
## 7 metA 100 bp del metA 100 FALSE 1 6 0 1 1
## 8 metA 100 bp del metA 100 FALSE 2 6 0 1 1
## 9 metA 900 bp del metA 900 FALSE 1 7 0 1 1
## 10 metA 900 bp del metA 900 FALSE 2 6 0 1 1
## 11 metA 13000 bp … metA 13000 FALSE 1 7 0 1 1
## 12 metA 13000 bp … metA 13000 FALSE 2 7 0 1 1
## 13 leuD 100 bp del leuD 100 FALSE 1 7 0 1 1
## 14 leuD 100 bp del leuD 100 FALSE 2 6 0 1 1
## 15 leuD 600 bp del leuD 600 FALSE 1 7 0 1 0.929
## 16 leuD 600 bp del leuD 600 FALSE 2 7 1 0.857 0.929
## 17 leuD 6000 bp d… leuD 6000 FALSE 1 7 0 1 0.929
## 18 leuD 6000 bp d… leuD 6000 FALSE 2 7 1 0.857 0.929
## # … with abbreviated variable names ¹deletion_size, ²pInt_only, ³replicate,
## # ⁴permissive_colonies, ⁵selective_colonies, ⁶accuracy, ⁷avg_accuracy
We will combine the data and plot:
#Combine data
<- bind_rows(df_galK_pin %>% filter(condition != 'pInt_only') %>% mutate(deletion_size = as.numeric(deletion_size)), df_aa_pin)
df_galK_aa_pin
#Get background values
<- (df_galK_pin %>% filter(condition == 'pInt_only' & replicate == 1))$avg_accuracy
galK_bg
#Plot with individual datapoints, and mean points connected by lines
<- ggplot(df_galK_aa_pin, aes(x = deletion_size, y = accuracy, color = locus)) +
plot_del_accuracy geom_hline(yintercept = 0, linetype = 2, color = 'light gray')+ #background accuracy for AA loci was 0.
geom_hline(yintercept = galK_bg, linetype = 4, color = 'light gray')+ #background accuracy for galK is shown separately here.
geom_line(data = . %>% filter(replicate==1), aes(y = avg_accuracy)) +
geom_jitter(shape = 21, width = 0.05, height = 0, alpha = 0.4) +
geom_point(data = . %>% filter(replicate==1), aes(y = avg_accuracy)) +
scale_y_continuous(labels = scales::label_percent(accuracy = 1), limits = c(0,1)) +
scale_x_log10(breaks = c(100, 1000, 10000, 50000), labels = c('100 bp', '1 kb', '10 kb', '50 kb'))+
labs(y = 'Phenotypic accuracy', x = 'Deletion Size')+
scale_color_viridis_d()
plot_del_accuracy
For this experiment different sized fragments from the luxR inducible violacein operon were cloned into an integrating plasmid. Efficiency was measured for each plasmid with the ∆galK TO. Let’s read in the data:
<- read_csv("../../data/low_throughput_experiments/2022_10_25_pInt_vio_sizes.csv") %>%
df_vio mutate(eff = Kan_count / LB_count) %>%
group_by(condition, to, plasmid_size) %>% mutate(avg_eff = mean(eff)) #calculate efficiency and average efficiency for replicates
%>% kable() %>% kable_styling() %>% scroll_box(height = '250px') df_vio
condition | to | plasmid_size | replicate | LB_count | Kan_count | eff | avg_eff |
---|---|---|---|---|---|---|---|
p265 + pInt_kan | 265 | 1958 | 1 | 1140000 | 490 | 0.0004298 | 0.0004158 |
p265 + pInt_kan | 265 | 1958 | 2 | 1120000 | 450 | 0.0004018 | 0.0004158 |
p265 + pInt_vioA | 265 | 3628 | 1 | 1080000 | 590 | 0.0005463 | 0.0005694 |
p265 + pInt_vioA | 265 | 3628 | 2 | 1080000 | 640 | 0.0005926 | 0.0005694 |
p265 + pInt_vioAB | 265 | 6718 | 1 | 1120000 | 360 | 0.0003214 | 0.0002468 |
p265 + pInt_vioAB | 265 | 6718 | 2 | 1220000 | 210 | 0.0001721 | 0.0002468 |
p265 + pInt_vioABC | 265 | 8203 | 1 | 1210000 | 53 | 0.0000438 | 0.0000418 |
p265 + pInt_vioABC | 265 | 8203 | 2 | 1280000 | 51 | 0.0000398 | 0.0000418 |
pInt_vioABC only | control | 8203 | 1 | 1040000 | 2 | 0.0000019 | 0.0000019 |
p265 + pInt_luxR_vioAE | 265 | 10671 | 1 | 1290000 | 32 | 0.0000248 | 0.0000230 |
p265 + pInt_luxR_vioAE | 265 | 10671 | 2 | 1130000 | 24 | 0.0000212 | 0.0000230 |
pInt_luxR_vioAE only | control | 10671 | 1 | 1400000 | 0 | 0.0000000 | 0.0000000 |
Now let’s plot:
<- ggplot(df_vio%>% filter(to != 'control'), aes(x = plasmid_size, y = eff)) +
plot_vio geom_jitter(shape = 21, width = 200, height = 0, color = "#440154FF", alpha = 0.4) +
geom_jitter(data = df_vio %>% filter(to == 'control'), shape = 4, width = 0, height = 0, color = 'light gray') + #negative control efficiencies for pInt_vioA-C and pInt_luxR_vioA-E are shown here
geom_point(data = . %>% filter(replicate ==1), aes(y = avg_eff), color = "#440154FF") +
geom_line(data = . %>% filter(replicate ==1), aes(y = avg_eff, group = to), color = "#440154FF") +
scale_y_log10(limits = c(NA,0.001), labels = scales::label_percent(accuracy = 0.001))+
scale_x_continuous(breaks = c(2000, 6000, 10000), labels = c('2 kb', '6 kb', '10 kb'))+
labs(x = 'Integrating plasmid size', y = 'Efficiency')
plot_vio
theme_set(theme_figure())
<- plot_grid(plot_gal_aa_sizes + guides(color = 'none'), plot_vio, nrow = 1,
fig_4_sizes align = 'hv', axis = 'lr', scale = 1,
labels = c('B','E'))
<- plot_grid(plot_gal_aa_sizes + guides(color = 'none'),plot_del_accuracy + guides(color = 'none'),
fig_4_sizes NULL,
plot_vio,nrow = 2, ncol = 2,
align = 'hv', axis = 'lr', scale = 1,
labels = c('B','C','E'))
fig_4_sizes
save_plot("../../figures/r_pdf_figs/main_figs/fig_4_del_int_sizes.pdf", fig_4_sizes, base_width = 5, base_height = 4)
sessionInfo()
## R version 4.2.0 (2022-04-22)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Big Sur/Monterey 10.16
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.2/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.2/Resources/lib/libRlapack.dylib
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] kableExtra_1.3.4 cowplot_1.1.1 viridis_0.6.2 viridisLite_0.4.1
## [5] knitr_1.41 forcats_0.5.2 stringr_1.5.0 dplyr_1.1.0
## [9] purrr_0.3.5 readr_2.1.3 tidyr_1.2.1 tibble_3.1.8
## [13] ggplot2_3.4.0 tidyverse_1.3.2
##
## loaded via a namespace (and not attached):
## [1] httr_1.4.4 sass_0.4.4 bit64_4.0.5
## [4] vroom_1.6.0 jsonlite_1.8.3 modelr_0.1.10
## [7] bslib_0.4.1 assertthat_0.2.1 highr_0.9
## [10] googlesheets4_1.0.1 cellranger_1.1.0 yaml_2.3.6
## [13] pillar_1.8.1 backports_1.4.1 glue_1.6.2
## [16] digest_0.6.30 rvest_1.0.3 colorspace_2.0-3
## [19] htmltools_0.5.4 pkgconfig_2.0.3 broom_1.0.1
## [22] haven_2.5.1 scales_1.2.1 webshot_0.5.4
## [25] svglite_2.1.0 tzdb_0.3.0 timechange_0.1.1
## [28] googledrive_2.0.0 generics_0.1.3 farver_2.1.1
## [31] ellipsis_0.3.2 cachem_1.0.6 withr_2.5.0
## [34] cli_3.4.1 magrittr_2.0.3 crayon_1.5.2
## [37] readxl_1.4.1 evaluate_0.18 fs_1.5.2
## [40] fansi_1.0.3 xml2_1.3.3 textshaping_0.3.6
## [43] tools_4.2.0 hms_1.1.2 gargle_1.2.1
## [46] lifecycle_1.0.3 munsell_0.5.0 reprex_2.0.2
## [49] compiler_4.2.0 jquerylib_0.1.4 systemfonts_1.0.4
## [52] rlang_1.0.6 grid_4.2.0 rstudioapi_0.14
## [55] labeling_0.4.2 rmarkdown_2.18 gtable_0.3.1
## [58] DBI_1.1.3 R6_2.5.1 gridExtra_2.3
## [61] lubridate_1.9.0 fastmap_1.1.0 bit_4.0.5
## [64] utf8_1.2.2 ragg_1.2.5 stringi_1.7.8
## [67] parallel_4.2.0 vctrs_0.5.2 dbplyr_2.2.1
## [70] tidyselect_1.2.0 xfun_0.35