RNA-seq read count normalization (TPM and RPKM)

#Rscript normalize_featurecounts.R counts_table.txt tpm ;

#Rscript normalize_featurecounts.R counts_table.txt rpkm ;

#Sample table###

#GeneID<TAB>sample1<TAB>sample2<TAB>sample3<TAB>Length

#Gene1<TAB>10<TAB>4<TAB>5<TAB>1500

#Gene2<TAB>20<TAB>43<TAB>60<TAB>4300

args<-commandArgs(TRUE)

ccols <- c(2:30) # sample1_column:sampleN_column

lcol <- 31 # Column containing length of sum of exons per gene (Please place it in last column to avoid confusion)

expr <- read.table(args[1], header=T, sep=“\t“)

rpkm <- function(counts, lengths) {

rate <- counts / lengths

rate / sum(counts) * 1e6

}

tpm <- function(counts, lengths) {

rate <- counts / lengths

rate / sum(rate) * 1e6

}

if(args[2]==“tpm“){

tpms <- apply(expr[,ccols], 2, function(x) tpm(x, expr[,lcol]))

res <- cbind(as.character(expr[,1]),tpms)

colnames(res)[1] <- colnames(expr)[1]

write.table(res,paste0(args[1],“.tpm“), sep=“\t“, row.names=F, quote=F)

}

if(args[2]==“rpkm“){

rpkms <- apply(expr[,ccols], 2, function(x) rpkm(x, expr[,lcol]))

res <- cbind(as.character(expr[,1]),rpkms)

colnames(res)[1] <- colnames(expr)[1]

write.table(res,paste0(args[1],“.rpkm“), sep=“\t“, row.names=F, quote=F)

}

本站原创，如若转载，请注明出处：https://www.ouq.net/3846.html