| 68 | {{{ |
| 69 | grep -v ^\@ foo.sam | awk -F"\t" '{ if($2==0 && $13=="MD:Z:20") print $3 }' | sort | uniq -c | awk '{ print $2"\t"$1 }' |
| 70 | }}} |
| 71 | |
| 72 | |
| 73 | ==== MAGeCK ==== |
| 74 | |
| 75 | * [[https://genomebiology.biomedcentral.com/articles/10.1186/s13059-014-0554-4 | Publication]] |
| 76 | * [[https://sourceforge.net/projects/mageck/ | MAGeCK Home/Download Page]] |
| 77 | * [[https://sourceforge.net/p/mageck/wiki/demo/ | Tutorial]] |
| 78 | |
| 79 | |
| 80 | ==== Test: compare two conditions ==== |
| 81 | |
| 82 | * Common usage to test, or compare, two conditions |
| 83 | |
| 84 | |
| 85 | {{{ |
| 86 | mageck test -k count_matrix.txt -t top1,top2 -c bot1,bot2 -n mageck_out.txt |
| 87 | # the options -t and -c specificity the treatment and control samples, respectively. |
| 88 | }}} |
| 89 | |
| 90 | |
| 91 | The input file, count_matrix.txt, column names must match arguments to -c and -t, e.g. |
| 92 | |
| 93 | |
| 94 | {{{ |
| 95 | |sgRNA|gene|bot1|bot2|top1|top2| |
| 96 | |sgACTL7A_2|ACTL7A|32|14|10|26| |
| 97 | |sgACTL7A_3|ACTL7A|44|40|82|118|s |
| 98 | |gACTL7A_4|ACTL7A|64|61|418|313| |
| 99 | |sgACTL7A_5|ACTL7A|9|0|17|74| |
| 100 | |sgACTL7A_6|ACTL7A|42|5|47|166| |
| 101 | |sgACTL7A_7|ACTL7A|14|32|23|60| |
| 102 | }}} |
| 103 | |
| 104 | |
| 105 | The output files include, |
| 106 | - .summary.pdf file which summarizes (only) the top hits, and also includes a waterfall plot. |
| 107 | - .gene_summary.txt results summarized by gene (for all genes) |
| 108 | - .sgrna_summary.txt resuls by guide (for all guides); this file can be made into a matrix using a few UNIX commands, e.g. |
| 109 | |
| 110 | |
| 111 | {{{ |
| 112 | #get only the columns of interest: Gene, sgrna, control_mean, treat_mean |
| 113 | cut -f 1,2,5,6 mageck.sgrna_summary.txt | awk '{print $2"\t"$1"\t"$3"\t"$4}' > CRISPR_score_sgRNA.txt |
| 114 | |
| 115 | #convert the single column into a (wide) matrix, each column is a guide and each row is a gene |
| 116 | grep -v crispr_sgRNA.txt | sed 's/_/\t/' | sort -k 1,1 -k 2,2 -k 3,3n | awk -F '\t' '{print $1"\t"$2"_"$3"\t"$4"\t"$5}' | grep -v INTERGENIC | grep -v CTRL0 | cut -f 1,4 | groupBy -g 1 -c 2 -o collapse |sed 's/,/\t/g' > CRISPR_score_sgRNA.txt |
| 117 | }}} |
| 118 | |
| 119 | |