| 160 | | }} |
| | 162 | }}} |
| | 163 | |
| | 164 | |
| | 165 | === create wiggle files for visualizing paired-end data mapping to the + and - strands === |
| | 166 | |
| | 167 | 1. split by strand by matched strand |
| | 168 | |
| | 169 | {{{ |
| | 170 | # input: accepted_hits.bam |
| | 171 | # output: accepted_hits_negStrand.bam: mapped to negative strand |
| | 172 | # accepted_hits_posStrand.bam: mapped to positive strand |
| | 173 | |
| | 174 | bsub "samtools view -f 16 -b accepted_hits.bam >| accepted_hits_negStrand.bam" |
| | 175 | bsub "samtools view -F 16 -b accepted_hits.bam >| accepted_hits_posStrand.bam" |
| | 176 | |
| | 177 | }}} |
| | 178 | |
| | 179 | 2. split reads by pair |
| | 180 | {{{ |
| | 181 | # input: accepted_hits_posStrand.bam or accepted_hits_negStrand.bam |
| | 182 | # output: 1st pair: *_1stPair.bam |
| | 183 | # 2nd pair: *_2ndPair.bam |
| | 184 | bsub "samtools view -b -f 0x0040 accepted_hits_posStrand.bam > accepted_hits_posStrand_1stPair.bam" |
| | 185 | bsub "samtools view -b -F 0x0040 accepted_hits_posStrand.bam > accepted_hits_posStrand_2ndPair.bam" |
| | 186 | bsub "samtools view -b -f 0x0040 accepted_hits_negStrand.bam > accepted_hits_negStrand_1stPair.bam" |
| | 187 | bsub "samtools view -b -F 0x0040 accepted_hits_negStrand.bam > accepted_hits_negStrand_2ndPair.bam" |
| | 188 | }}} |
| | 189 | |
| | 190 | 3. convert from bam to bedgraph format |
| | 191 | {{{ |
| | 192 | # input: bam format: accepted_hits_*Strand_*Pair.bam |
| | 193 | # /nfs/genomes/mouse_gp_jul_07/anno/mm9.size: length of each chromosome, format like |
| | 194 | # chr1 197195432 |
| | 195 | # output: bedgraph format: accepted_hits_*Strand_*Pair.bedgraph |
| | 196 | bsub "genomeCoverageBed -split -bg -ibam accepted_hits_posStrand_1stPair.bam -g mm9.size >| accepted_hits_posStrand_1stPair.bedgraph" |
| | 197 | bsub "genomeCoverageBed -split -bg -ibam accepted_hits_posStrand_2ndPair.bam -g mm9.size >| accepted_hits_posStrand_2ndPair.bedgraph" |
| | 198 | bsub "genomeCoverageBed -split -bg -ibam accepted_hits_negStrand_1stPair.bam -g mm9.size >| accepted_hits_negStrand_1stPair.bedgraph" |
| | 199 | bsub "genomeCoverageBed -split -bg -ibam accepted_hits_negStrand_2ndPair.bam -g mm9.size >| accepted_hits_negStrand_2ndPair.bedgraph" |
| | 200 | }}} |
| | 201 | |
| | 202 | 4. join the reads sharing the same strand |
| | 203 | {{{ |
| | 204 | # This step is for fr-firststrand library (such as dUTP). which is |
| | 205 | 1+-,1-+,2++,2-- |
| | 206 | |
| | 207 | read1 mapped to ‘+’ strand indicates parental gene on ‘-‘ strand |
| | 208 | read1 mapped to ‘-‘ strand indicates parental gene on ‘+’ strand |
| | 209 | read2 mapped to ‘+’ strand indicates parental gene on ‘+’ strand |
| | 210 | read2 mapped to ‘-‘ strand indicates parental gene on ‘-‘ strand |
| | 211 | |
| | 212 | # input: bedgraph file from the same strand |
| | 213 | # output: merged bedgraph: pos.bedgraph or neg.bedgraph |
| | 214 | unionBedGraphs -i accepted_hits_posStrand_2ndPair.bedgraph accepted_hits_negStrand_1stPair.bedgraph |awk '{ print $1"\t"$2"\t"$3"\t"$4+$5 }' >|pos.bedgraph |
| | 215 | unionBedGraphs -i accepted_hits_posStrand_1stPair.bedgraph accepted_hits_negStrand_2ndPair.bedgraph |awk '{ print $1"\t"$2"\t"$3"\t-"$4+$5 }' >|neg.bedgraph |
| | 216 | }}} |
| | 217 | |
| | 218 | * To check how reads were stranded for strand-specific RNA-seq data, you can use infer_experiment.py from [http://rseqc.sourceforge.net/ RSeQC] |
| | 219 | |
| | 220 | {{{ |
| | 221 | infer_experiment.py -r mm9.refseq.bed12 -i accepted_hits.bam |
| | 222 | }}} |
| | 223 | |
| | 224 | 5. convert bedgraph to bigwig |
| | 225 | |
| | 226 | {{{ |
| | 227 | # get rid of header lines of mm9.size: the header line with "chrom size" is removed |
| | 228 | # input: mm9.size: length of each chromosome |
| | 229 | # output: mm9.size_noHeader |
| | 230 | tail --line=+2 mm9.size > mm9.size_noHeader |
| | 231 | # convert bedgraph to bigwig |
| | 232 | # input: bedgraph file: neg.bedgraph or pos.bedgraph |
| | 233 | # mm9.size_noHeader: length of each chromosome |
| | 234 | # *output: bigwig format: neg.bw or pos.bw |
| | 235 | # neg.bw or pos.bw can be visualized with IGV/UCSC genome browser |
| | 236 | bsub bedGraphToBigWig neg.bedgraph mm9.size_noHeader neg.bw |
| | 237 | bsub bedGraphToBigWig pos.bedgraph mm9.size_noHeader pos.bw |
| | 238 | }}} |