Question

我有一个像这样的data.frame：

df=head(df)       
                                                                                                        t20coreB t20crustB t30crustB t0core t0crust
CellularProcesses 0 1 0 0 0
CellularProcesses2 0 1 0 0 0
CellularProcesses3 0 0 4 4 0
CellularProcesses4 0 0 0 0 0
CellularProcesses5 0 1 0 0 0
CellularProcesses6 12 6 1 12 13

                                                                                                        t10coreA t10crustA t10coreB t10crustB
CellularProcesses 0 0 0 0
CellularProcesses2 0 0 0 0
CellularProcesses3 0 1 15 0
CellularProcesses4 1 0 0 0
CellularProcesses5 1 0 0 0
CellularProcesses6 0 11 5 1

                                                                                                        t30coreA t30crustA t30coreB
CellularProcesses 0 0 0
CellularProcesses2 0 0 0
CellularProcesses3 0 3 1
CellularProcesses4 2 1 0
CellularProcesses5 0 0 1
CellularProcesses6 1 28 5

我希望将每列中的值除以最后一行（在相应列中）中的值。

我试过了：

df.normalized=df[1:3,]/df[4,]

但它没有用，我收到了错误。

有什么建议吗？

Answer 1

您还可以将其保留为data.frame

df[] <- lapply(df, function(x) as.numeric(gsub('[.]', '', x)))
res <- df[1:3,]/unlist(df[4,])[col(df)[-4,]]

或者

res <- t(t(df[1:3,])/unlist(df[4,]))

res[,1]
#[1] 1.638736e-06 3.277471e-07 7.374310e-06

df[1:3,1]/df[4,1]
#[1] 1.638736e-06 3.277471e-07 7.374310e-06

更新

更新的数据有numeric最后一行6

 res <- df[1:5,]/unlist(df[6,])[col(df)[-6,]]
 res[,6]
 #[1] NaN NaN NaN Inf Inf

您获得NaN，Inf，因为

 df[,6]
 #[1] 0 0 0 1 1 0

 0/0
 #[1] NaN
 1/0
#[1] Inf

如果您希望将任何列中的NaN转换为0

 res[] <- lapply(res, function(x) replace(x, is.nan(x),0))

适用于NaN和Inf至0

 res[] <- lapply(res, function(x) replace(x, !is.finite(x),0))

NaN的另一个选项是

 res[res=='NaN'] <- 0

数据

df <- structure(list(S1 = c("10", "2", "45", "6.102.266"), S2 = c("20", 
"5", "8", "8.392.734"), S3 = c("5", "88", "74", "6.329.533"), 
S4 = c("6", "32", "5", "6.393.165"), S5 = c("18", "85", "61", 
"7.127.333"), S6 = c(12, 45, 23, 810.613), S7 = c("7", "6", 
"2", "5.832.144"), S8 = c("8", "1", "20", "8.560.084"), S9 = c("91", 
"20", "0", "9.133.783"), S10 = c("12", "33", "8", "3.537.480"
), S11 = c("11", "22", "9", "24.708.786"), S12 = c("75", 
"85", "55", "5.928.850")), .Names = c("S1", "S2", "S3", "S4", 
"S5", "S6", "S7", "S8", "S9", "S10", "S11", "S12"), class = "data.frame", 
row.names = c("A", "B", "C", "Reads"))

新数据

df <- structure(list(t20coreB = c(0L, 0L, 0L, 0L, 0L, 12L), t20crustB = c(1L, 
1L, 0L, 0L, 1L, 6L), t30crustB = c(0L, 0L, 4L, 0L, 0L, 1L), t0core = c(0L, 
0L, 4L, 0L, 0L, 12L), t0crust = c(0L, 0L, 0L, 0L, 0L, 13L), t10coreA = c(0L, 
0L, 0L, 1L, 1L, 0L), t10crustA = c(0L, 0L, 1L, 0L, 0L, 11L), 
t10coreB = c(0L, 0L, 15L, 0L, 0L, 5L), t10crustB = c(0L, 
0L, 0L, 0L, 0L, 1L), t30coreA = c(0L, 0L, 0L, 2L, 0L, 1L), 
t30crustA = c(0L, 0L, 3L, 1L, 0L, 28L), t30coreB = c(0L, 
0L, 1L, 0L, 1L, 5L)), .Names = c("t20coreB", "t20crustB", 
"t30crustB", "t0core", "t0crust", "t10coreA", "t10crustA", "t10coreB", 
"t10crustB", "t30coreA", "t30crustA", "t30coreB"), row.names =   c("CellularProcesses;Transportandcatabolism;Lysosome[PATH:ko04142];E32125,MANBA,manB;beta-mannosidase[EC:32125]", 
 "CellularProcesses;Transportandcatabolism;Lysosome[PATH:ko04142];GBA,srfJ;glucosylceramidase[EC:32145]", 
     "CellularProcesses;Transportandcatabolism;Lysosome[PATH:ko04142];HEXA_B;hexosaminidase[EC:32152]", 
 "CellularProcesses;Transportandcatabolism;Lysosome[PATH:ko04142];NEU1;sialidase-1[EC:32118]", 
 "CellularProcesses;Transportandcatabolism;Lysosome[PATH:ko04142];uidA,GUSB;beta-glucuronidase[EC:32131]", 
  "CellularProcesses;Transportandcatabolism;Peroxisome[PATH:ko04146];ACSL,fadD;long-chainacyl-CoAsynthetase[EC:6213]"
   ), class = "data.frame")

Answer 2

如果将数据框转换为数字矩阵，它将起作用：

mat <- sapply(df, function(x) as.numeric(gsub("\\.", "", x)))
mat[1:3, ] / mat[4, ]

如何将data.frame中的所有值除以一行

2 个答案:

更新

数据

新数据