The raw sequencing data is available on the SRA and is associated with this BioProject.

Library Name Run ID and Link Size
short_TF_oligos SRR25076297 45 Mb
long_TF_oligos SRR25076306 27 Mb
small_RNA_oligos SRR25076305 42 Mb

This notebook does fastp quality filtering on raw data for the TO abundance raw sequencing data.

source ~/.bash_profile

conda activate intel_env

fastp -i "../../../data/seq_data/twist_TO_abundance/raw_data/twist-lib-1_S1_L001_R1_001.fastq.gz" -I "../../../data/seq_data/twist_TO_abundance/raw_data/twist-lib-1_S1_L001_R2_001.fastq.gz" -m --merged_out "../../../data/seq_data/twist_TO_abundance/fastp_processed/twist_lib_1_fastp.fastq.gz" -h "../../../data/seq_data/twist_TO_abundance/fastp_processed/lib_1_report.html"

fastp -i "../../../data/seq_data/twist_TO_abundance/raw_data/twist-lib-2_S2_L001_R1_001.fastq.gz" -I "../../../data/seq_data/twist_TO_abundance/raw_data/twist-lib-2_S2_L001_R2_001.fastq.gz" -m --merged_out "../../../data/seq_data/twist_TO_abundance/fastp_processed/twist_lib_2_fastp.fastq.gz" -h "../../../data/seq_data/twist_TO_abundance/fastp_processed/lib_2_report.html"

fastp -i "../../../data/seq_data/twist_TO_abundance/raw_data/twist-lib-3_S3_L001_R1_001.fastq.gz" -I "../../../data/seq_data/twist_TO_abundance/raw_data/twist-lib-3_S3_L001_R2_001.fastq.gz" -m --merged_out "../../../data/seq_data/twist_TO_abundance/fastp_processed/twist_lib_3_fastp.fastq.gz" -h "../../../data/seq_data/twist_TO_abundance/fastp_processed/lib_3_report.html"
## Read1 before filtering:
## total reads: 340923
## total bases: 60855248
## Q20 bases: 60402544(99.2561%)
## Q30 bases: 60157929(98.8541%)
## 
## Read2 before filtering:
## total reads: 340923
## total bases: 41739155
## Q20 bases: 40626868(97.3351%)
## Q30 bases: 40300096(96.5523%)
## 
## Merged and filtered:
## total reads: 329928
## total bases: 58892880
## Q20 bases: 58504579(99.3407%)
## Q30 bases: 58281296(98.9615%)
## 
## Filtering result:
## reads passed filter: 681056
## reads failed due to low quality: 790
## reads failed due to too many N: 0
## reads failed due to too short: 0
## reads with adapter trimmed: 66
## bases trimmed due to adapters: 2548
## reads corrected by overlap analysis: 8870
## bases corrected by overlap analysis: 18876
## 
## Duplication rate: 23.0319%
## 
## Insert size peak (evaluated by paired-end reads): 179
## 
## Read pairs merged: 329928
## % of original read pairs: 96.7749%
## % in reads after filtering: 100%
## 
## 
## JSON report: fastp.json
## HTML report: ../../../data/seq_data/twist_TO_abundance/fastp_processed/lib_1_report.html
## 
## fastp -i ../../../data/seq_data/twist_TO_abundance/raw_data/twist-lib-1_S1_L001_R1_001.fastq.gz -I ../../../data/seq_data/twist_TO_abundance/raw_data/twist-lib-1_S1_L001_R2_001.fastq.gz -m --merged_out ../../../data/seq_data/twist_TO_abundance/fastp_processed/twist_lib_1_fastp.fastq.gz -h ../../../data/seq_data/twist_TO_abundance/fastp_processed/lib_1_report.html 
## fastp v0.23.2, time used: 3 seconds
## Read1 before filtering:
## total reads: 206126
## total bases: 36769233
## Q20 bases: 36369840(98.9138%)
## Q30 bases: 36159049(98.3405%)
## 
## Read2 before filtering:
## total reads: 206126
## total bases: 25219399
## Q20 bases: 24909450(98.771%)
## Q30 bases: 24813946(98.3923%)
## 
## Merged and filtered:
## total reads: 202476
## total bases: 36117739
## Q20 bases: 35761154(99.0127%)
## Q30 bases: 35562548(98.4628%)
## 
## Filtering result:
## reads passed filter: 411916
## reads failed due to low quality: 336
## reads failed due to too many N: 0
## reads failed due to too short: 0
## reads with adapter trimmed: 50
## bases trimmed due to adapters: 1810
## reads corrected by overlap analysis: 5397
## bases corrected by overlap analysis: 10680
## 
## Duplication rate: 6.59742%
## 
## Insert size peak (evaluated by paired-end reads): 179
## 
## Read pairs merged: 202476
## % of original read pairs: 98.2292%
## % in reads after filtering: 100%
## 
## 
## JSON report: fastp.json
## HTML report: ../../../data/seq_data/twist_TO_abundance/fastp_processed/lib_2_report.html
## 
## fastp -i ../../../data/seq_data/twist_TO_abundance/raw_data/twist-lib-2_S2_L001_R1_001.fastq.gz -I ../../../data/seq_data/twist_TO_abundance/raw_data/twist-lib-2_S2_L001_R2_001.fastq.gz -m --merged_out ../../../data/seq_data/twist_TO_abundance/fastp_processed/twist_lib_2_fastp.fastq.gz -h ../../../data/seq_data/twist_TO_abundance/fastp_processed/lib_2_report.html 
## fastp v0.23.2, time used: 1 seconds
## Read1 before filtering:
## total reads: 309192
## total bases: 55180806
## Q20 bases: 54579629(98.9105%)
## Q30 bases: 54225149(98.2681%)
## 
## Read2 before filtering:
## total reads: 309192
## total bases: 37883633
## Q20 bases: 37195908(98.1846%)
## Q30 bases: 37009548(97.6927%)
## 
## Merged and filtered:
## total reads: 304789
## total bases: 54398241
## Q20 bases: 53848799(98.99%)
## Q30 bases: 53509178(98.3656%)
## 
## Filtering result:
## reads passed filter: 617938
## reads failed due to low quality: 446
## reads failed due to too many N: 0
## reads failed due to too short: 0
## reads with adapter trimmed: 48
## bases trimmed due to adapters: 1639
## reads corrected by overlap analysis: 8968
## bases corrected by overlap analysis: 15743
## 
## Duplication rate: 16.4374%
## 
## Insert size peak (evaluated by paired-end reads): 179
## 
## Read pairs merged: 304789
## % of original read pairs: 98.576%
## % in reads after filtering: 100%
## 
## 
## JSON report: fastp.json
## HTML report: ../../../data/seq_data/twist_TO_abundance/fastp_processed/lib_3_report.html
## 
## fastp -i ../../../data/seq_data/twist_TO_abundance/raw_data/twist-lib-3_S3_L001_R1_001.fastq.gz -I ../../../data/seq_data/twist_TO_abundance/raw_data/twist-lib-3_S3_L001_R2_001.fastq.gz -m --merged_out ../../../data/seq_data/twist_TO_abundance/fastp_processed/twist_lib_3_fastp.fastq.gz -h ../../../data/seq_data/twist_TO_abundance/fastp_processed/lib_3_report.html 
## fastp v0.23.2, time used: 2 seconds
source ~/.bash_profile

conda activate intel_env

gzip -d "../../../data/seq_data/twist_TO_abundance/fastp_processed/twist_lib_1_fastp.fastq.gz" 
gzip -d "../../../data/seq_data/twist_TO_abundance/fastp_processed/twist_lib_2_fastp.fastq.gz" 
gzip -d "../../../data/seq_data/twist_TO_abundance/fastp_processed/twist_lib_3_fastp.fastq.gz" 

fastq_to_fasta -i "../../../data/seq_data/twist_TO_abundance/fastp_processed/twist_lib_1_fastp.fastq" -o "../../../data/seq_data/twist_TO_abundance/fastp_processed/lib_1_TOs.fasta"
fastq_to_fasta -i "../../../data/seq_data/twist_TO_abundance/fastp_processed/twist_lib_2_fastp.fastq" -o "../../../data/seq_data/twist_TO_abundance/fastp_processed/lib_2_TOs.fasta"
fastq_to_fasta -i "../../../data/seq_data/twist_TO_abundance/fastp_processed/twist_lib_3_fastp.fastq" -o "../../../data/seq_data/twist_TO_abundance/fastp_processed/lib_3_TOs.fasta"
## gzip: ../../../data/seq_data/twist_TO_abundance/fastp_processed/twist_lib_1_fastp.fastq already exists -- skipping
## gzip: ../../../data/seq_data/twist_TO_abundance/fastp_processed/twist_lib_2_fastp.fastq already exists -- skipping
## gzip: ../../../data/seq_data/twist_TO_abundance/fastp_processed/twist_lib_3_fastp.fastq already exists -- skipping
sessionInfo()
## R version 4.2.0 (2022-04-22)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Big Sur/Monterey 10.16
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.2/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.2/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## loaded via a namespace (and not attached):
##  [1] digest_0.6.30   R6_2.5.1        lifecycle_1.0.3 jsonlite_1.8.3 
##  [5] magrittr_2.0.3  evaluate_0.18   stringi_1.7.8   cachem_1.0.6   
##  [9] rlang_1.1.1     cli_3.4.1       rstudioapi_0.14 jquerylib_0.1.4
## [13] bslib_0.4.1     vctrs_0.5.2     rmarkdown_2.18  tools_4.2.0    
## [17] stringr_1.5.0   glue_1.6.2      xfun_0.35       yaml_2.3.6     
## [21] fastmap_1.1.0   compiler_4.2.0  htmltools_0.5.4 knitr_1.41     
## [25] sass_0.4.4