RNA-seq read count normalization (TPM and RPKM)
| #Rscript normalize_featurecounts.R counts_table.txt tpm ; |
| #Rscript normalize_featurecounts.R counts_table.txt rpkm ; |
| #Sample table### |
| #GeneID<TAB>sample1<TAB>sample2<TAB>sample3<TAB>Length |
| #Gene1<TAB>10<TAB>4<TAB>5<TAB>1500 |
| #Gene2<TAB>20<TAB>43<TAB>60<TAB>4300 |
| args<-commandArgs(TRUE) |
| ccols <- c(2:30) # sample1_column:sampleN_column |
| lcol <- 31 # Column containing length of sum of exons per gene (Please place it in last column to avoid confusion) |
| expr <- read.table(args[1], header=T, sep=“\t“) |
| rpkm <- function(counts, lengths) { |
| rate <- counts / lengths |
| rate / sum(counts) * 1e6 |
| } |
| tpm <- function(counts, lengths) { |
| rate <- counts / lengths |
| rate / sum(rate) * 1e6 |
| } |
| if(args[2]==“tpm“){ |
| tpms <- apply(expr[,ccols], 2, function(x) tpm(x, expr[,lcol])) |
| res <- cbind(as.character(expr[,1]),tpms) |
| colnames(res)[1] <- colnames(expr)[1] |
| write.table(res,paste0(args[1],“.tpm“), sep=“\t“, row.names=F, quote=F) |
| } |
| if(args[2]==“rpkm“){ |
| rpkms <- apply(expr[,ccols], 2, function(x) rpkm(x, expr[,lcol])) |
| res <- cbind(as.character(expr[,1]),rpkms) |
| colnames(res)[1] <- colnames(expr)[1] |
| write.table(res,paste0(args[1],“.rpkm“), sep=“\t“, row.names=F, quote=F) |
| } |
本站原创,如若转载,请注明出处:https://www.ouq.net/3846.html
微信打赏,为服务器增加50M流量
支付宝打赏,为服务器增加50M流量