RNA-seq read count normalization (TPM and RPKM)
#Rscript normalize_featurecounts.R counts_table.txt tpm ; |
#Rscript normalize_featurecounts.R counts_table.txt rpkm ; |
#Sample table### |
#GeneID<TAB>sample1<TAB>sample2<TAB>sample3<TAB>Length |
#Gene1<TAB>10<TAB>4<TAB>5<TAB>1500 |
#Gene2<TAB>20<TAB>43<TAB>60<TAB>4300 |
args<-commandArgs(TRUE) |
ccols <- c(2:30) # sample1_column:sampleN_column |
lcol <- 31 # Column containing length of sum of exons per gene (Please place it in last column to avoid confusion) |
expr <- read.table(args[1], header=T, sep=“\t“) |
rpkm <- function(counts, lengths) { |
rate <- counts / lengths |
rate / sum(counts) * 1e6 |
} |
tpm <- function(counts, lengths) { |
rate <- counts / lengths |
rate / sum(rate) * 1e6 |
} |
if(args[2]==“tpm“){ |
tpms <- apply(expr[,ccols], 2, function(x) tpm(x, expr[,lcol])) |
res <- cbind(as.character(expr[,1]),tpms) |
colnames(res)[1] <- colnames(expr)[1] |
write.table(res,paste0(args[1],“.tpm“), sep=“\t“, row.names=F, quote=F) |
} |
if(args[2]==“rpkm“){ |
rpkms <- apply(expr[,ccols], 2, function(x) rpkm(x, expr[,lcol])) |
res <- cbind(as.character(expr[,1]),rpkms) |
colnames(res)[1] <- colnames(expr)[1] |
write.table(res,paste0(args[1],“.rpkm“), sep=“\t“, row.names=F, quote=F) |
} |
本站原创,如若转载,请注明出处:https://www.ouq.net/3846.html