Changes between Version 2 and Version 3 of SOPs/gzipping
- Timestamp:
- 11/15/13 12:22:47 (11 years ago)
Legend:
- Unmodified
- Added
- Removed
- Modified
-
SOPs/gzipping
v2 v3 2 2 3 3 # Untar from solexa_public, gunzip, and gzip \\ 4 tar -xOzf /lab/solexa_public/LAB/RUN/QualityScore/Foo.1.tar.gz | gzip -f > fastq/My_sample _2.1.fq.gz \\5 tar -xOzf /lab/solexa_public/LAB/RUN/QualityScore/Foo.2.tar.gz | gzip -f > fastq/My_sample _2.2.fq.gz \\4 tar -xOzf /lab/solexa_public/LAB/RUN/QualityScore/Foo.1.tar.gz | gzip -f > fastq/My_sample.1.fq.gz \\ 5 tar -xOzf /lab/solexa_public/LAB/RUN/QualityScore/Foo.2.tar.gz | gzip -f > fastq/My_sample.2.fq.gz \\ 6 6 7 7 # Run FastQC \\ 8 fastqc fastq/My_sample _2.1.fq.gz \\9 fastqc fastq/My_sample _2.2.fq.gz \\8 fastqc fastq/My_sample.1.fq.gz \\ 9 fastqc fastq/My_sample.2.fq.gz \\ 10 10 11 11 # Trim adapters \\ 12 gunzip -c fastq/My_sample _2.1.fq.gz | fastx_clipper -v -z -l 30 -a GATCGGAAGAGCACACGTCTGAACTCCAGTCACCGATGTATCTCGTATGCCGTCTTCTGCTTG -o fastq/My_sample_2.noVec.1.fq.gz \\13 gunzip -c fastq/My_sample _2.2.fq.gz | fastx_clipper -v -z -l 30 -a GATCGGAAGAGCACACGTCTGAACTCCAGTCACCGATGTATCTCGTATGCCGTCTTCTGCTTG -o fastq/My_sample_2.noVec.2.fq.gz \\14 fastqc fastq/My_sample _2.noVec.1.fq.gz \\15 fastqc fastq/My_sample _2.noVec.2.fq.gz \\12 gunzip -c fastq/My_sample.1.fq.gz | fastx_clipper -v -z -l 30 -a GATCGGAAGAGCACACGTCTGAACTCCAGTCACCGATGTATCTCGTATGCCGTCTTCTGCTTG -o fastq/My_sample.noVec.1.fq.gz \\ 13 gunzip -c fastq/My_sample.2.fq.gz | fastx_clipper -v -z -l 30 -a GATCGGAAGAGCACACGTCTGAACTCCAGTCACCGATGTATCTCGTATGCCGTCTTCTGCTTG -o fastq/My_sample.noVec.2.fq.gz \\ 14 fastqc fastq/My_sample.noVec.1.fq.gz \\ 15 fastqc fastq/My_sample.noVec.2.fq.gz \\ 16 16 17 17 # Drop reads of low quality \\ 18 gunzip -c fastq/My_sample _2.noVec.1.fq.gz | fastq_quality_filter -v -q 20 -p 75 -z -o fastq/My_sample_2.noVec.qgt20.1.fq.gz \\19 gunzip -c fastq/My_sample _2.noVec.2.fq.gz | fastq_quality_filter -v -q 20 -p 75 -z -o fastq/My_sample_2.noVec.qgt20.2.fq.gz \\18 gunzip -c fastq/My_sample.noVec.1.fq.gz | fastq_quality_filter -v -q 20 -p 75 -z -o fastq/My_sample.noVec.qgt20.1.fq.gz \\ 19 gunzip -c fastq/My_sample.noVec.2.fq.gz | fastq_quality_filter -v -q 20 -p 75 -z -o fastq/My_sample.noVec.qgt20.2.fq.gz \\ 20 20 21 21 # Trim low-quality bases at end of reads \\ 22 gunzip -c fastq/My_sample _2.noVec.qgt20.1.fq.gz | fastq_quality_trimmer -v -t 20 -l 30 -z -o fastq/My_sample_2.noVec.qgt20.t.1.fq.gz \\23 gunzip -c fastq/My_sample _2.noVec.qgt20.2.fq.gz | fastq_quality_trimmer -v -t 20 -l 30 -z -o fastq/My_sample_2.noVec.qgt20.t.2.fq.gz \\22 gunzip -c fastq/My_sample.noVec.qgt20.1.fq.gz | fastq_quality_trimmer -v -t 20 -l 30 -z -o fastq/My_sample.noVec.qgt20.t.1.fq.gz \\ 23 gunzip -c fastq/My_sample.noVec.qgt20.2.fq.gz | fastq_quality_trimmer -v -t 20 -l 30 -z -o fastq/My_sample.noVec.qgt20.t.2.fq.gz \\ 24 24 25 25 # After trimming, get reads that are still paired (and output as regular fastq) \\ 26 /nfs/BaRC_Public/BaRC_code/Perl/cmpfastq/cmpfastqgz.pl fastq/My_sample _2.noVec.qgt20.t.1.fq.gz fastq/My_sample_2.noVec.qgt20.t.2.fq.gz26 /nfs/BaRC_Public/BaRC_code/Perl/cmpfastq/cmpfastqgz.pl fastq/My_sample.noVec.qgt20.t.1.fq.gz fastq/My_sample.noVec.qgt20.t.2.fq.gz 27 27 28 28 # Map paired reads with bowtie \\ 29 # THIS STEP CANNOT USE GZ FILES\\30 bowtie -l 30 -n 1 -X 1000 --best -S /nfs/genomes/ a.thaliana_TAIR_10/bowtie/tair10_SI -1 fastq/My_sample_2.noVec.qgt20.t.1.fq-common.out -2 fastq/My_sample_2.noVec.qgt20.t.2.fq-common.out mapped_reads/My_sample_2.30.1.sam29 # '''bowtie cannot use gzipped input files''' \\ 30 bowtie -l 30 -n 1 -X 1000 --best -S /nfs/genomes/GENOME/bowtie/INDEX -1 fastq/My_sample.noVec.qgt20.t.1.fq-common.out -2 fastq/My_sample.noVec.qgt20.t.2.fq-common.out mapped_reads/My_sample.30.1.sam 31 31 32 32 # Sort and index mapped reads \\ 33 /nfs/BaRC_Public/BaRC_code/Perl/SAM_to_BAM_sort_index/SAM_to_BAM_sort_index.pl mapped_reads/My_sample _2.30.1.sam \\33 /nfs/BaRC_Public/BaRC_code/Perl/SAM_to_BAM_sort_index/SAM_to_BAM_sort_index.pl mapped_reads/My_sample.30.1.sam \\ 34 34 # Delete SAM file (since BAM file has all the same info) \\ 35 rm -f mapped_reads/My_sample _2.30.1.sam \\35 rm -f mapped_reads/My_sample.30.1.sam \\ 36 36 37 37 # Get summary counts \\ 38 zcat fastq/My_sample _2.1.fq.gz | echo $((`wc -l`/4)) \\39 zcat fastq/My_sample _2.noVec.1.fq.gz | echo $((`wc -l`/4)) \\40 zcat fastq/My_sample _2.noVec.2.fq.gz | echo $((`wc -l`/4)) \\41 zcat fastq/My_sample _2.noVec.qgt20.1.fq.gz | echo $((`wc -l`/4)) \\42 zcat fastq/My_sample _2.noVec.qgt20.2.fq.gz | echo $((`wc -l`/4)) \\43 more fastq/My_sample _2.noVec.qgt20.t.1.fq | echo $((`wc -l`/4)) \\44 more fastq/My_sample _2.noVec.qgt20.t.2.fq | echo $((`wc -l`/4)) \\45 zmore fastq/My_sample _2.noVec.qgt20.t.1.fq-common.out.gz | echo $((`wc -l`/4)) \\46 samtools flagstat mapped_reads/My_sample _2.30.1.sorted.bam > mapped_reads/My_sample_2.30.1.flagstat.txt \\47 grep ' mapped (' mapped_reads/My_sample _2.30.1.flagstat.txt38 zcat fastq/My_sample.1.fq.gz | echo $((`wc -l`/4)) \\ 39 zcat fastq/My_sample.noVec.1.fq.gz | echo $((`wc -l`/4)) \\ 40 zcat fastq/My_sample.noVec.2.fq.gz | echo $((`wc -l`/4)) \\ 41 zcat fastq/My_sample.noVec.qgt20.1.fq.gz | echo $((`wc -l`/4)) \\ 42 zcat fastq/My_sample.noVec.qgt20.2.fq.gz | echo $((`wc -l`/4)) \\ 43 more fastq/My_sample.noVec.qgt20.t.1.fq | echo $((`wc -l`/4)) \\ 44 more fastq/My_sample.noVec.qgt20.t.2.fq | echo $((`wc -l`/4)) \\ 45 zmore fastq/My_sample.noVec.qgt20.t.1.fq-common.out.gz | echo $((`wc -l`/4)) \\ 46 samtools flagstat mapped_reads/My_sample.30.1.sorted.bam > mapped_reads/My_sample.30.1.flagstat.txt \\ 47 grep ' mapped (' mapped_reads/My_sample.30.1.flagstat.txt 48 48 49 49