v6 v7 10 10 export PATH=/usr/lib/jvm/java-7-openjdk-amd64/bin:$PATH 11 11 \\ \\ 12 '''Index the reference genome.''' [Need to do just once, with [[|samtools]].]12 1 - '''Index the reference genome.''' [Need to do just once, with [[|samtools]].] 13 13 * samtools faidx /path/to/genome/genome.fa 14 14 \\ 15 '''Create a genome dictionary.''' [Need to do just once, with Picard's [[|CreateSequenceDictionary]].]15 2 - '''Create a genome dictionary.''' [Need to do just once, with Picard's [[|CreateSequenceDictionary]].] 16 16 * java -jar /usr/local/share/picard-tools/CreateSequenceDictionary.jar R=/path/to/genome/genome.fa O=/path/to/genome/genome.dict 17 17 \\ 18 '''Validate VCF file or known variants''' (with GATK's [[|ValidateVariants]])18 3 - '''Validate VCF file or known variants''' (with GATK's [[|ValidateVariants]]) 19 19 * java -jar /usr/local/gatk/GenomeAnalysisTK.jar -T ValidateVariants -R /path/to/genome/genome.fa --variant:VCF SNPs_from_NCBI.sorted.vcf \\ 20 20 Respond to errors (by correcting or removing problematic variants), run command again, etc., until validation is successful. \\ … … 22 22 23 23 \\ 24 '''Align reads to genome with [[|bwa]]'''24 4 - '''Align reads to genome with [[|bwa]]''' 25 25 * bsub "bwa aln /path/to/genome/bwa/genome Reads_1.fq > Reads_1.sai" 26 26 * bsub "bwa samse /path/to/genome/bwa/genome Reads_1.sai Reads_1.fq > Reads_1.bwa.sam" 27 27 \\ 28 '''Convert SAM to BAM, sort, and index''' with BaRC's streamlined [[|samtools]] commands28 5 - '''Convert SAM to BAM, sort, and index''' with BaRC's streamlined [[|samtools]] commands 29 29 * bsub /nfs/BaRC_Public/BaRC_code/Perl/SAM_to_BAM_sort_index/ Reads_1.bwa.sam 30 30 \\ 31 '''Mark duplicates''' (multiple identical reads mapped to the same location) \\31 6 - '''Mark duplicates''' (multiple identical reads mapped to the same location) \\ 32 32 Run Picard Tools' [[|MarkDuplicates]] on each sample \\ 33 33 May Need "VALIDATION_STRINGENCY=LENIENT" if you get \\ … … 35 35 * bsub java -jar /usr/local/share/picard-tools/MarkDuplicates.jar I=Reads_1.bwa.sorted.bam O=Reads_1.bwa.dedup.bam M=Reads_1.bwa.dedup.txt VALIDATION_STRINGENCY=LENIENT 36 36 \\ 37 '''Add Read Group header information to each BAM file''' (or GATK won't let you continue) \\37 7 - '''Add Read Group header information to each BAM file''' (or GATK won't let you continue) \\ 38 38 Run Picard Tools' [[|AddOrReplaceReadGroups]] on each sample. \\ 39 39 Specify RGSM (Read Group sample), RGLB (Read Group Library), RGPL (Read Group platform), and RGPU (Read Group platform unit [e.g. run barcode]) 40 40 * bsub java -jar /usr/local/share/picard-tools/AddOrReplaceReadGroups.jar I=Reads_1.bwa.dedup.bam O=Reads_1.bwa.dedup.good.bam RGSM=My_sample RGLB=My_project RGPL=illumina RGPU=none VALIDATION_STRINGENCY=LENIENT 41 41 \\ 42 '''Index BAM file(s)''' with [[|samtools]] (optional; for IGV viewing)42 8 - '''Index BAM file(s)''' with [[|samtools]] (optional; for IGV viewing) 43 43 * bsub samtools index Reads_1.bwa.dedup.good.bam 44 44 \\ 45 '''Run Indel Realignment''' (with [[|RealignerTargetCreator]] and [[|IndelRealigner]]) \\45 9 - '''Run Indel Realignment''' (with [[|RealignerTargetCreator]] and [[|IndelRealigner]]) \\ 46 46 * Example 1: java -jar /usr/local/gatk/GenomeAnalysisTK.jar -T RealignerTargetCreator -R human.fasta -I original.bam -known indels.vcf -o realigner.intervals \\ 47 47 * Example 2: java -jar /usr/local/gatk/GenomeAnalysisTK.jar -T IndelRealigner -R human.fasta -I original.bam -known indels.vcf -targetIntervals realigner.intervals -o realigned.bam \\ … … 49 49 * java -jar /usr/local/gatk/GenomeAnalysisTK.jar -T IndelRealigner -R /path/to/genome/genome.fa -I Reads_1.bwa.dedup.good.bam -targetIntervals Reads_1.realigner.intervals -o Reads_1.bwa.dedup.realigned.bam --fix_misencoded_quality_scores 50 50 \\ 51 '''Run Base Recalibration''' ([[|BaseRecalibrator]] and [[|PrintReads]]) \\51 10 - '''Run Base Recalibration''' ([[|BaseRecalibrator]] and [[|PrintReads]]) \\ 52 52 * Example 1: java -jar GenomeAnalysisTK.jar -T BaseRecalibrator -R human.fasta -I realigned.bam -knownSites dbsnp137.vcf -knownSites gold.standard.indels.vcf -o recal.table 53 53 * Example 2: java -jar GenomeAnalysisTK.jar -T PrintReads -R human.fasta -I realigned.bam -BQSR recal.table -o recal.bam \\ … … 63 63 64 64 \\ 65 '''Compress BAM with [[|ReduceReads]]''' [Optional] \\65 11 - '''Compress BAM with [[|ReduceReads]]''' [Optional] \\ 66 66 * Example 1: java -jar GenomeAnalysisTK.jar -T ReduceReads -R human.fasta -I recal.bam -o reduced.bam 67 67 * java -jar /usr/local/gatk/GenomeAnalysisTK.jar -T ReduceReads -R /path/to/genome/genome.fa -I Reads_1.bwa.dedup.realigned.recal.bam -o Reads_1.bwa.dedup.realigned.recal.reduced.bam 68 68 \\ 69 '''Finally -- Call variants''' \\69 12 - '''Finally -- Call variants''' \\ 70 70 Run [[|HaplotypeCaller]] ("The HaplotypeCaller is a more recent and sophisticated tool than the UnifiedGenotyper.") 71 71 * Example: java -jar GenomeAnalysisTK.jar -T HaplotypeCaller -R human.fasta -I input.bam -o output.vcf -stand_call_conf 30 -stand_emit_conf 10 -minPruning 3 … … 76 76 * java -jar /usr/local/gatk/GenomeAnalysisTK.jar -T UnifiedGenotyper -R /nfs/genomes/a.thaliana_TAIR_10/fasta_whole_genome/TAIR10.fa -I Reads_1.bwa.dedup.realigned.recal.reduced.bam --dbsnp SNPs_from_NCBI.sorted.vcf -o Reads_1.bwa.raw.snps.indels.UnifiedGenotyper.vcf -stand_call_conf 30 -stand_emit_conf 10 77 77 \\ 78 '''Run Variant Quality Score Recalibration''' ("VQSR", with [[|VariantRecalibrator] and [[|ApplyRecalibration]) \\ \\79 '''Run Genotype Phasing and Refinement''' \\ \\80 '''Run Functional Annotation''' ([[|snpEff]] and [[|VariantAnnotator]] [which "parses output from snpEff into a simpler format that is more useful for analysis"])78 13 - '''Run Variant Quality Score Recalibration''' ("VQSR", with [[|VariantRecalibrator] and [[|ApplyRecalibration]) \\ \\ 79 14 - '''Run Genotype Phasing and Refinement''' \\ \\ 80 15 - '''Run Functional Annotation''' ([[|snpEff]] and [[|VariantAnnotator]] [which "parses output from snpEff into a simpler format that is more useful for analysis"]) 81 81 * Example 1: java -jar snpEff.jar eff -v -onlyCoding true -i vcf -o gatk GRCh37.64 input.vcf > output.vcf 82 82 * Example 2: java -jar GenomeAnalysisTK.jar -T VariantAnnotator -R human.fasta -A SnpEff --variant original.vcf --snpEffFile snpEff_output.vcf -o annotated.vcf 83 83 84 '''Analyze variant calls''' (with [[|CombineVariants]], [[|SelectVariants]], and [[|VariantEval]]) \\ \\84 16 - '''Analyze variant calls''' (with [[|CombineVariants]], [[|SelectVariants]], and [[|VariantEval]]) \\ \\ 85 85