Notes

This figure contains data from many different experiments that were used to optimize the protocol for ORBIT.


Setup packages and plotting for the notebook:

# Check packages
source("../tools/package_setup.R")
# Load packages
library(tidyverse)
library(cowplot)
library(kableExtra)
library(ggrastr)
# Code display options
knitr::opts_chunk$set(tidy.opts=list(width.cutoff=60),tidy=FALSE, echo = TRUE, message=FALSE, warning=FALSE, fig.align="center", fig.retina = 2)
# Load plotting tools
source("../tools/plotting_tools.R")
#Modify the plot theme
theme_set(theme_figure())

df_read_nearest_down <- read_csv('../../data/seq_data/left_side_TO_libs/df_read_nearest_left.csv')
df_read_nearest_up <- read_csv('../../data/seq_data/right_side_TO_libs/df_read_nearest_right.csv')
df_read_nearest_up_down <- bind_rows(df_read_nearest_up %>% mutate(up_down = 'up'),df_read_nearest_down %>%  mutate(up_down = 'down')) %>% 
  mutate(off_target = ifelse(min_val >500, T, F)) %>% 
  mutate(perfect = ifelse(min_val == 0, T, F))

df_read_nearest_up_down
## # A tibble: 536,686 × 15
##    seqnames strand cigar qwidth   start    end width njunc   lib read_…¹ gene_id
##    <chr>    <chr>  <chr>  <dbl>   <dbl>  <dbl> <dbl> <dbl> <dbl>   <dbl> <chr>  
##  1 U00096.3 -      79M2S     81 2978946 2.98e6    79     0     0 2979024 lysR   
##  2 U00096.3 -      6S47M     53 2095079 2.10e6    47     0     0 2095125 hisA   
##  3 U00096.3 +      81M       81 4277913 4.28e6    81     0     0 4277913 soxR   
##  4 U00096.3 -      76M       76 3535810 3.54e6    76     0     0 3535885 ompR   
##  5 U00096.3 +      2S78M     80  317133 3.17e5    78     0     0  317133 ykgA   
##  6 U00096.3 -      51M       51 3059708 3.06e6    51     0     0 3059758 argP   
##  7 U00096.3 -      2S30M     32 2095096 2.10e6    30     0     0 2095125 hisA   
##  8 U00096.3 -      81M       81 3535805 3.54e6    81     0     0 3535885 ompR   
##  9 U00096.3 +      75M       75 4099474 4.10e6    75     0     0 4099474 rhaR   
## 10 U00096.3 +      34M2S     36  417811 4.18e5    34     0     0  417811 phoB   
## # … with 536,676 more rows, 4 more variables: min_val <dbl>, up_down <chr>,
## #   off_target <lgl>, perfect <lgl>, and abbreviated variable name
## #   ¹​read_start_pos
df_read_nearest_up_down_org <- df_read_nearest_up_down %>% 
  arrange(lib,gene_id, up_down, perfect, width ) %>% 
  group_by(lib,gene_id, up_down, perfect) %>% 
  mutate(read_num = row_number())

df_read_nearest_up_down_org
## # A tibble: 536,686 × 16
## # Groups:   lib, gene_id, up_down, perfect [1,485]
##    seqnames strand cigar qwidth  start    end width njunc   lib read_s…¹ gene_id
##    <chr>    <chr>  <chr>  <dbl>  <dbl>  <dbl> <dbl> <dbl> <dbl>    <dbl> <chr>  
##  1 U00096.3 -      22M       22 568000 568021    22     0     0   568021 acrR   
##  2 U00096.3 -      22M       22 568000 568021    22     0     0   568021 acrR   
##  3 U00096.3 -      22M       22 568000 568021    22     0     0   568021 acrR   
##  4 U00096.3 -      22M       22 568000 568021    22     0     0   568021 acrR   
##  5 U00096.3 -      22M       22 568000 568021    22     0     0   568021 acrR   
##  6 U00096.3 -      22M       22 568000 568021    22     0     0   568021 acrR   
##  7 U00096.3 -      22M       22 568000 568021    22     0     0   568021 acrR   
##  8 U00096.3 -      22M       22 568000 568021    22     0     0   568021 acrR   
##  9 U00096.3 -      22M       22 568000 568021    22     0     0   568021 acrR   
## 10 U00096.3 -      23M       23 508201 508223    23     0     0   508223 acrR   
## # … with 536,676 more rows, 5 more variables: min_val <dbl>, up_down <chr>,
## #   off_target <lgl>, perfect <lgl>, read_num <int>, and abbreviated variable
## #   name ¹​read_start_pos
df_TOs <- read_csv("../../data/seq_data/right_side_TO_libs/df_TOs.csv")
df_TOs
## # A tibble: 420 × 7
##      lib gene  left_oligo_pos right_oligo_pos oligo              product go_term
##    <dbl> <chr>          <dbl>           <dbl> <chr>              <chr>   <chr>  
##  1     1 alpA         2758649         2758836 CTCTCTGCAACCAAAGT… CP4-57… DNA bi…
##  2     1 argR         3384708         3385153 GCACAATAATGTTGTAT… DNA-bi… DNA bi…
##  3     1 ariR         1216374         1216615 AAACTTATACTTAATAA… putati… protei…
##  4     1 arsR         3648533         3648861 TCGAAGAGAGACACTAC… DNA-bi… transc…
##  5     1 asnC         3926565         3926998 TAAAATAGAATGAATCA… DNA-bi… protei…
##  6     1 bolA          454477          454769 GCGCCGACAGTTGCAAA… DNA-bi… protei…
##  7     1 caiF           34305           34675 ACGCCCGGTATCAACGA… DNA-bi… DNA-bi…
##  8     1 cedA         1813441         1813658 GCAAAACCGGGAGGTAT… cell d… DNA bi…
##  9     1 cspA         3720054         3720241 TCGCCGAAAGGCACACT… cold s… nuclei…
## 10     1 cspC         1907246         1907430 CACACTTCAGATCAGTG… stress… nuclei…
## # … with 410 more rows
df_genes <- read_delim("../../data/seq_data/All_instances_of_Genes_in_Escherichia_coli_K-12_substr._MG1655.txt") %>% 
  mutate(left_pos = `Left-End-Position`, right_pos = `Right-End-Position`) %>% 
  select(-`Left-End-Position`, -`Right-End-Position`) %>% 
  mutate(fwd = ifelse(Direction =='+', T, F))

df_genes_lib <- left_join(df_TOs %>% select(lib, 'gene_id'='gene'), df_genes, by = c('gene_id'='Genes'))

df_genes_lib
## # A tibble: 420 × 7
##      lib gene_id Direction Product                         left_…¹ right…² fwd  
##    <dbl> <chr>   <chr>     <chr>                             <dbl>   <dbl> <lgl>
##  1     1 alpA    +         CP4-57 prophage; DNA-binding t… 2758644 2758856 TRUE 
##  2     1 argR    +         DNA-binding transcriptional du… 3384703 3385173 TRUE 
##  3     1 ariR    +         putative two-component system … 1216369 1216635 TRUE 
##  4     1 arsR    +         DNA-binding transcriptional re… 3648528 3648881 TRUE 
##  5     1 asnC    -         DNA-binding transcriptional du… 3926545 3927003 FALSE
##  6     1 bolA    +         DNA-binding transcriptional du…  454472  454789 TRUE 
##  7     1 caiF    +         DNA-binding transcriptional ac…   34300   34695 TRUE 
##  8     1 cedA    -         cell division modulator         1813421 1813663 FALSE
##  9     1 cspA    +         cold shock protein CspA         3720049 3720261 TRUE 
## 10     1 cspC    -         stress protein, member of the … 1907226 1907435 FALSE
## # … with 410 more rows, and abbreviated variable names ¹​left_pos, ²​right_pos
df_poly <- df_genes_lib %>% 
 #filter(right_pos<20000) %>% 
 pivot_longer(c(left_pos, right_pos), values_to = 'x_pos', names_to = 'left_right') %>% 
 mutate( y_pos_bottom = 10,y_pos_top = 100, y_pos_center=31.623) %>% 
 pivot_longer(c(y_pos_bottom,y_pos_top, y_pos_center), values_to = 'y_pos', names_to = 'top_bottom') %>% 
 mutate(x_pos = ifelse(left_right == 'left_pos' & Direction == '-' & top_bottom !='y_pos_center', x_pos + 50, x_pos)) %>% 
 mutate(x_pos = ifelse(left_right == 'right_pos' & Direction == '+' & top_bottom !='y_pos_center', x_pos - 50, x_pos)) %>% 
 mutate(point_num = case_when(left_right == 'left_pos' & top_bottom == 'y_pos_bottom' ~ 1,
left_right == 'left_pos' & top_bottom == 'y_pos_center' ~ 2,
left_right == 'left_pos' & top_bottom == 'y_pos_top' ~ 3,
left_right == 'right_pos' & top_bottom == 'y_pos_top' ~ 4,
left_right == 'right_pos' & top_bottom == 'y_pos_center' ~ 5,
left_right == 'right_pos' & top_bottom == 'y_pos_bottom' ~ 6)) %>% 
 arrange(gene_id, point_num)

df_poly
## # A tibble: 2,520 × 10
##      lib gene_id      Direc…¹ Product fwd   left_…²  x_pos top_b…³ y_pos point…⁴
##    <dbl> <chr>        <chr>   <chr>   <lgl> <chr>    <dbl> <chr>   <dbl>   <dbl>
##  1     3 3'ETS-<i>le… -       small … FALSE left_p… 1.99e6 y_pos_…  10         1
##  2     3 3'ETS-<i>le… -       small … FALSE left_p… 1.99e6 y_pos_…  31.6       2
##  3     3 3'ETS-<i>le… -       small … FALSE left_p… 1.99e6 y_pos_… 100         3
##  4     3 3'ETS-<i>le… -       small … FALSE right_… 1.99e6 y_pos_… 100         4
##  5     3 3'ETS-<i>le… -       small … FALSE right_… 1.99e6 y_pos_…  31.6       5
##  6     3 3'ETS-<i>le… -       small … FALSE right_… 1.99e6 y_pos_…  10         6
##  7     3 C0293        +       small … TRUE  left_p… 1.20e6 y_pos_…  10         1
##  8     3 C0293        +       small … TRUE  left_p… 1.20e6 y_pos_…  31.6       2
##  9     3 C0293        +       small … TRUE  left_p… 1.20e6 y_pos_… 100         3
## 10     3 C0293        +       small … TRUE  right_… 1.20e6 y_pos_… 100         4
## # … with 2,510 more rows, and abbreviated variable names ¹​Direction,
## #   ²​left_right, ³​top_bottom, ⁴​point_num
 kb_label <- function(x){
 # from s to ns
 lab <- paste(x / 1000)
 }

plot_all <- ggplot()+
  geom_rect(data = df_TOs %>% select(lib, gene_id = gene, left_oligo_pos, right_oligo_pos) %>% filter( lib == 1),
                     aes(xmin = left_oligo_pos, xmax = right_oligo_pos, ymin = 0, ymax = 50), fill = 'light gray')+
  geom_segment(data = df_read_nearest_down %>% filter(lib==1) %>% filter(min_val <500),
               aes(x = start, xend = end, y = width, yend = width ),
               #position = position_jitter(height = 75),
               alpha = 0.01)+
  geom_segment(data = df_read_nearest_up%>% filter(lib==1) %>% filter(min_val <500),
             aes(x = start, xend = end, y = width, yend = width ),
             #position = position_jitter(height = 75),
             alpha = 0.01)+
  scale_x_continuous(labels = scales::label_comma())+
  scale_y_continuous(breaks = c(0,40,80), limits = c(0,100))+
  labs(x = "Genomic position (bp)", y = 'Mapped read lengths (bp)')+
  guides(fill = 'none')+
  facet_wrap(~gene_id, scales = 'free', ncol = 8,strip.position = 'top')

plot_all <- ggplot()+
  geom_polygon(data = df_poly %>% filter(gene_id %in% c('alpA','argR','arsR')), aes(x = x_pos, y = y_pos, group = gene_id),fill = 'light gray')+
  geom_segment(data = df_read_nearest_up_down_org%>% filter(gene_id %in% c('alpA','argR','arsR')) %>% filter(min_val <500),
             aes(x = start, xend = end, y = read_num, yend = read_num, color = perfect), alpha = 0.25)+
  labs(x = "Genomic position (Kb)", y = 'Mapped read lengths (bp)')+
   scale_x_continuous(labels = kb_label)+
  scale_y_log10()+
  guides(fill = 'none')+
  facet_wrap(~gene_id, scales = 'free_x', ncol = 8,strip.position = 'top')

Fig. S11 - IDT oPool

plot_lib_0 <- ggplot()+
  geom_polygon(data = df_poly %>% filter(lib == 0), aes(x = x_pos, y = y_pos, group = gene_id),fill = 'light gray')+
    rasterise(geom_segment(data = df_read_nearest_up_down_org%>% filter(lib == 0) %>% filter(min_val <500),
             aes(x = start, xend = end, y = read_num, yend = read_num, color = perfect), alpha = 0.25, linewidth = 0.5), dpi = 2000, dev = "cairo")+
  labs(x = "Genomic position (Kb)", y = 'Read_count')+
   scale_x_continuous(labels = kb_label,breaks = scales::extended_breaks(n=3))+
  scale_y_log10(limits =c(1, NA))+
  guides(fill = 'none')+
  facet_wrap(~gene_id, scales = 'free', ncol = 8,strip.position = 'top') + guides(color = 'none')

plot_lib_0

#save_plot("../../figures/r_pdf_figs/supp_figs/plot_lib_0.pdf",plot_lib_0, base_height = 4, base_width = 8)

Fig. S13 - Lib #1 - short TFs

plot_lib_1 <- ggplot()+
  geom_polygon(data = df_poly %>% filter(lib == 1), aes(x = x_pos, y = y_pos, group = gene_id),fill = 'light gray')+
  rasterise(geom_segment(data = df_read_nearest_up_down_org%>% filter(lib == 1) %>% filter(min_val <500),
             aes(x = start, xend = end, y = read_num, yend = read_num, color = perfect), alpha = 0.25, linewidth = 0.5), dpi = 2000, dev = "cairo")+
  labs(x = "Genomic position (Kb)", y = 'Read count')+
   scale_x_continuous(labels = kb_label,breaks = scales::extended_breaks(n=3))+
  scale_y_log10(limits =c(1, NA))+
  guides(fill = 'none')+
  facet_wrap(~gene_id, scales = 'free', ncol = 8,strip.position = 'top') + guides(color = 'none')

plot_lib_1

#save_plot("../../figures/r_pdf_figs/supp_figs/plot_lib_1.pdf",plot_lib_1, base_height = 10, base_width = 8)

Fig. S14 - Lib #2 - long TFs

Note this plot is so large, that the Rmd notebook seems to have trouble displaying it, however the pdf output works fine.

plot_lib_2 <- ggplot()+
  geom_polygon(data = df_poly %>% filter(lib == 2), aes(x = x_pos, y = y_pos, group = gene_id),fill = 'light gray')+
  rasterise(
  geom_segment(data = df_read_nearest_up_down_org%>% filter(lib == 2) %>% filter(min_val <500),
             aes(x = start, xend = end, y = read_num, yend = read_num, color = perfect), alpha = 0.25, linewidth = 0.5), 
  dpi = 2000, dev = "cairo")+
  labs(x = "Genomic position (Kb)", y = 'Read_count')+
  scale_x_continuous(labels = kb_label,breaks = scales::extended_breaks(n=3))+
  scale_y_log10(limits =c(1, NA))+
  guides(fill = 'none')+
  facet_wrap(~gene_id, scales = 'free', ncol = 8,strip.position = 'top') + guides(color = 'none')

#plot_lib_2

#save_plot("../../figures/r_pdf_figs/supp_figs/plot_lib_2.pdf",plot_lib_2, base_height = 20, base_width = 8)

Fig. S15 - Lib #3 - sRNAs

plot_lib_3 <- ggplot()+
  geom_polygon(data = df_poly %>% filter(lib == 3), aes(x = x_pos, y = y_pos, group = gene_id),fill = 'light gray')+
  rasterise(geom_segment(data = df_read_nearest_up_down_org%>% filter(lib == 3) %>% filter(min_val <500),
             aes(x = start, xend = end, y = read_num, yend = read_num, color = perfect), alpha = 0.25, linewidth = 0.5), dpi = 2000, dev = "cairo")+
  labs(x = "Genomic position (Kb)", y = 'Read_count')+
   scale_x_continuous(labels = kb_label,breaks = scales::extended_breaks(n=3))+
  scale_y_log10(limits =c(1, NA))+
  guides(fill = 'none')+
  facet_wrap(~gene_id, scales = 'free', ncol = 8,strip.position = 'top') + guides(color = 'none')

plot_lib_3

#save_plot("../../figures/r_pdf_figs/supp_figs/plot_lib_3.pdf",plot_lib_3, base_height = 10, base_width = 8)
sessionInfo()
## R version 4.2.0 (2022-04-22)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Big Sur/Monterey 10.16
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.2/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.2/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] ggrastr_1.0.1     kableExtra_1.3.4  cowplot_1.1.1     viridis_0.6.2    
##  [5] viridisLite_0.4.1 knitr_1.41        forcats_0.5.2     stringr_1.5.0    
##  [9] dplyr_1.1.0       purrr_0.3.5       readr_2.1.3       tidyr_1.2.1      
## [13] tibble_3.1.8      ggplot2_3.4.0     tidyverse_1.3.2  
## 
## loaded via a namespace (and not attached):
##  [1] httr_1.4.4          sass_0.4.4          bit64_4.0.5        
##  [4] vroom_1.6.0         jsonlite_1.8.3      modelr_0.1.10      
##  [7] bslib_0.4.1         assertthat_0.2.1    highr_0.9          
## [10] vipor_0.4.5         googlesheets4_1.0.1 cellranger_1.1.0   
## [13] yaml_2.3.6          pillar_1.8.1        backports_1.4.1    
## [16] glue_1.6.2          digest_0.6.30       rvest_1.0.3        
## [19] colorspace_2.0-3    htmltools_0.5.4     pkgconfig_2.0.3    
## [22] broom_1.0.1         haven_2.5.1         scales_1.2.1       
## [25] webshot_0.5.4       svglite_2.1.0       tzdb_0.3.0         
## [28] timechange_0.1.1    googledrive_2.0.0   farver_2.1.1       
## [31] generics_0.1.3      ellipsis_0.3.2      cachem_1.0.6       
## [34] withr_2.5.0         cli_3.4.1           magrittr_2.0.3     
## [37] crayon_1.5.2        readxl_1.4.1        evaluate_0.18      
## [40] fs_1.5.2            fansi_1.0.3         xml2_1.3.3         
## [43] beeswarm_0.4.0      Cairo_1.6-0         tools_4.2.0        
## [46] hms_1.1.2           gargle_1.2.1        lifecycle_1.0.3    
## [49] munsell_0.5.0       reprex_2.0.2        compiler_4.2.0     
## [52] jquerylib_0.1.4     systemfonts_1.0.4   rlang_1.1.1        
## [55] grid_4.2.0          rstudioapi_0.14     labeling_0.4.2     
## [58] rmarkdown_2.18      gtable_0.3.1        DBI_1.1.3          
## [61] R6_2.5.1            gridExtra_2.3       lubridate_1.9.0    
## [64] fastmap_1.1.0       bit_4.0.5           utf8_1.2.2         
## [67] stringi_1.7.8       ggbeeswarm_0.7.1    parallel_4.2.0     
## [70] vctrs_0.5.2         dbplyr_2.2.1        tidyselect_1.2.0   
## [73] xfun_0.35