| 181 | | The interface of this command has been slightly modified by BaRC, but the main method remains exactly the same. The modified scripts are in /nfs/BaRC_Public/BaRC_code/R/IDR |
| 182 | | |
| 183 | | The command requires names and lengths of all chromosomes in a file (ex: chromInfo) of format chromosome<tab>size. |
| 184 | | |
| 185 | | {{{ |
| 186 | | # USAGE: batch-consistency-analysis.r [peakfile1] [peakfile2] [peak.half.width] [outfile.prefix] [min.overlap.ratio] [is.broadpeak] [ranking.measure]\ |
| 187 | | # |
| 188 | | # peak.half.width => set the peak.half.width to the reported peak width in the peak files (-1 (minus one) is recommended) |
| 189 | | # outfile.prefix => the one-word name of your analysis |
| 190 | | # min.overlap.ratio => fractional bp overlap (0 to 1) between peaks in replicates to be considered as overlapping peaks. (0 is recommended) |
| 191 | | # is.broadpeak => Is the peak file format broadPeak (or else is narrowPeak)? (F for false) The IDR method is not recommended for broad peaks. |
| 192 | | # ranking.measure => p.value is recommended |
| 193 | | |
| 194 | | batch-consistency-analysis.r IP.1_vs_control.1.regionPeak.gz IP.2_vs_control.2.regionPeak.gz -1 rep1_vs_rep2_IDR 0 F p.value chromInfo.txt |
| 195 | | }}} |
| 196 | | |
| 197 | | ==== Create the IDR plots ==== |
| 198 | | {{{ |
| 199 | | # USAGE: batch-consistency-plot.r [npairs] [output.prefix] [input.file.prefix1] [input.file.prefix2] [input.file.prefix3] .... |
| 200 | | # This method can plot 1 or more pairs of replicates |
| 201 | | |
| 202 | | batch-consistency-plot.r 1 rep1_vs_rep2_IDR_plot rep1_vs_rep2_IDR |
| 203 | | }}} |
| 204 | | |
| 205 | | ==== Generate a conservative and an optimal final set of peak calls ==== |
| 206 | | {{{ |
| 207 | | # Use an IDR cutoff of 0.01 to 0.05, depending on the number of pre-IDR peaks and size of the genome: |
| 208 | | # IDR <= 0.05 for < 100K pre-IDR peaks for large genomes (human/mouse) |
| 209 | | # IDR <= 0.01 or 0.02 for ~15K to 40K peaks in smaller genomes such as worm |
| 210 | | # awk note: $NF refers to the last data column which (in the *overlapped-peaks.txt file) is the IDR |
| 211 | | |
| 212 | | awk '{ if($NF < 0.05) print $0 }' rep1_vs_rep2_IDR-overlapped-peaks.txt > rep1_vs_rep2_conserved_peaks_by_IDR.txt |
| | 181 | For full usage, try 'idr h' |
| | 182 | {{{ |
| | 183 | idr --samples My_TF_rep_1.narrowPeak My_TF_rep_2.narrowPeak --input-file-type narrowPeak --output-file My_TF.IDR.txt --plot |