| 2 | |
| 3 | # use macs2 to get peaks: You can use --nomodel --shiftsize fragmentLenth based on NSC/RSC |
| 4 | bsub macs2 callpeak -t IP_1.bam -c control_1.bam -f BAM -g hs -n IP.1_vs_control.1 -B -p 1e-3 |
| 5 | bsub macs2 callpeak -t IP_2.bam -c control_2.bam -f BAM -g hs -n IP.2_vs_control.2 -B -p 1e-3 |
| 6 | |
| 7 | # sort .narrowPeak files ( macs2 output) from best to worst using the -log10(pvalue) column i.e. column 8, and only keep the top 100k peaks |
| 8 | bsub "sort -k 8nr,8nr IP.1_vs_control.1_peaks.narrowPeak |head -n 100000|gzip -c >| IP.1_vs_control.1.regionPeak.gz" |
| 9 | bsub "sort -k 8nr,8nr IP.2_vs_control.2_peaks.narrowPeak |head -n 100000|gzip -c >| IP.2_vs_control.2.regionPeak.gz" |
| 10 | |
| 11 | # estimate Irreproducibility Discovery Rate (IDR) between replicates: |
| 12 | # chromInfo.txt format: chromosome<tab>size |
| 13 | Rscript batch-consistency-analysis.r IP.1_vs_control.1.regionPeak.gz IP.2_vs_control.2.regionPeak.gz -1 rep1_vs_rep2_IDR 0 F p.value chromInfo.txt |
| 14 | |
| 15 | # plot the IDR plots |
| 16 | Rscript batch-consistency-plot.r 1 rep1_vs_rep2_IDR_plot rep1_vs_rep2_IDR |
| 17 | |
| 18 | # generate a conservative and an optimal final set of peak calls: |
| 19 | # IDR cutoff is between 0.01 or 0.05 depends on the number of pre-IDR peaks or size of genomes: |
| 20 | # IDR<=0.05 for < 100K pre-IDR peaks for large genomes (human/mouse) |
| 21 | # IDR <= 0.01 or 0.02 for ~15K to 40K peaks in smaller genomes such as worm |
| 22 | |
| 23 | awk '{ if($NF < 0.05) print $0 }' rep1_vs_rep2_IDR-overlapped-peaks.txt > rep1_vs_rep2_conserved_peaks_by_IDR.txt |