找到配置文件之间的最小距离

时间:2016-10-06 09:20:11

标签: r

我想找到存储在数据框中的配置文件之间的最小距离。与存储在数据框中的其余行相比,我特别感兴趣的是一行。

这是一个数据框:

structure(list(`10` = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
                        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), `34` = c(0, 0, 0, 0, 0, 
                                                                      0, 0, 0, 0, 0, 0, 393090, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6718400, 
                                                                      0, 311350, 0), `59` = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2164949.7, 
                                                                                              4834137.6, 0, 0, 0, 1187816.7, 0, 0, 0, 0, 0, 0, 1340912.5, 0
                                                                      ), `84` = c(0, 0, 0, 0, 0, 0, 0, 0, 8607100, 0, 0, 17586713.2, 
                                                                                  22629743.6, 0, 0, 0, 2808791.7, 0, 0, 4026222.5, 0, 0, 0, 1981900, 
                                                                                  0), `110` = c(2296000, 0, 0, 0, 0, 2140221.7, 0, 0, 5809230.6, 
                                                                                                0, 0, 37134898.5, 3861828.7, 2553100, 0, 12075845.8, 0, 0, 1272950, 
                                                                                                8695273, 0, 0, 2657180, 2710080, 0), `134` = c(0, 0, 0, 1176150, 
                                                                                                                                               0, 1329596.7, 1471000, 0, 6511934, 6511934, 0, 18709227.3, 0, 
                                                                                                                                               1041211.2, 0, 6544176.9, 0, 0, 2412651.7, 7724956.9, 2878418.3, 
                                                                                                                                               0, 8620131.7, 2386972.8, 0), `165` = c(0, 1226610, 0, 1345098.7, 
                                                                                                                                                                                      2083771.9, 0, 1808231.4, 0, 0, 10742997.7, 0, 13060798.9, 0, 
                                                                                                                                                                                      538340, 538340, 2791649.5, 0, 0, 6217622, 1316097.1, 4716931.8, 
                                                                                                                                                                                      0, 6615816.9, 1510532, 0), `199` = c(0, 1571525, 0, 1903038.3, 
                                                                                                                                                                                                                           1676700, 0, 888832.2, 0, 0, 9084418.6, 0, 11189460.1, 0, 0, 1807662.5, 
                                                                                                                                                                                                                           2564275, 0, 0, 18080359.7, 0, 0, 0, 2397710.2, 1717949.2, 0), 
               `234` = c(0, 1314900, 2482696, 1325684, 0, 0, 0, 0, 0, 7321432.7, 
                         0, 9843409.2, 0, 0, 1073341.7, 2762775, 0, 0, 9335312.8, 
                         0, 0, 0, 1950788.2, 1509100, 0), `257` = c(0, 1568700, 14604298.7, 
                                                                    940162.2, 0, 0, 0, 0, 0, 4779505.9, 0, 9691692.4, 0, 0, 735290, 
                                                                    2650165, 0, 2311383.7, 5193383.4, 0, 0, 0, 1341998.7, 1225325.6, 
                                                                    0), `362` = c(0, 0, 4190740.5, 288800, 0, 0, 0, 0, 0, 4846634.8, 
                                                                                  0, 9574498.7, 0, 0, 0, 1425600, 0, 8339312.1, 3877892.5, 
                                                                                  0, 0, 0, 1752866.7, 0, 0), `433` = c(0, 0, 773280, 0, 0, 
                                                                                                                       0, 0, 0, 0, 3926582.8, 3926582.8, 5962586.5, 0, 0, 0, 1041400, 
                                                                                                                       0, 1972909.3, 1895439.4, 0, 0, 0, 963891.2, 0, 1109800), 
               `506` = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9332272, 0, 0, 0, 
                         0, 0, 0, 2219100, 0, 0, 0, 0, 0, 0, 0), `581` = c(0, 0, 0, 
                                                                           0, 0, 0, 0, 0, 0, 0, 4371537.1, 0, 0, 0, 0, 0, 0, 2428800, 
                                                                           0, 0, 0, 0, 0, 0, 0), `652` = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 
                                                                                                           0, 1689871.4, 0, 0, 0, 0, 0, 0, 988399.7, 0, 0, 0, 0, 0, 
                                                                                                           0, 0), `733` = c(0, 0, 0, 0, 0, 0, 0, 1250100, 0, 0, 1754205.3, 
                                                                                                                            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), `818` = c(0, 0, 
                                                                                                                                                                                 0, 0, 0, 0, 0, 517340, 0, 0, 1149227.6, 0, 0, 0, 0, 0, 0, 
                                                                                                                                                                                 0, 0, 0, 0, 0, 0, 0, 0), `896` = c(0, 0, 0, 0, 0, 0, 0, 579846.7, 
                                                                                                                                                                                                                    0, 0, 985931.2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), 
               `972` = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 858255.5, 0, 0, 0, 
                         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), `1039` = c(0, 0, 0, 0, 
                                                                      0, 0, 0, 0, 0, 0, 848993.3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
                                                                      0, 0, 0, 0)), .Names = c("10", "34", "59", "84", "110", "134", 
                                                                                               "165", "199", "234", "257", "362", "433", "506", "581", "652", 
                                                                                               "733", "818", "896", "972", "1039"), row.names = c("Mark_1", 
                                                                                                                                                  "Mark_2", "Alex_1", "Katrin_1", "Georg_1", "Martin_1", 
                                                                                                                                                  "Tim_1", "Tom_1", "Mike_1", "Mike_2", "Mike_3", 
                                                                                                                                                  "Hare_1", "Dea_1", "Monty_1", "Monty_2", "Niko_1", 
                                                                                                                                                  "Lee_1", "Marq_1", "Otto_1", "Priaq_1", "Surkta_1", 
                                                                                                                                                  "Norsa_1", "Norsa_2", "Quer_1", "Quer_2"), class = "data.frame")

所以名为Katrin_1的行是我感兴趣的行。我想找到哪些行的欧氏距离最小为Katrin_1。比方说3-5行。

1 个答案:

答案 0 :(得分:4)

让我们用Katrin_1删除df[!rownames(df) %in% "Katrin_1", ]列,用df["Katrin_1", ]从剩下的每一行中减去sweep,通过平方得到欧几里德距离矩阵元素并使用rowSums,使用which.min得到最终结果:

names(which.min(rowSums(sweep(df[!rownames(df) %in% "Katrin_1", ], 2, as.numeric(df["Katrin_1", ]), `-`)^2)))
# [1] "Mark_2"

这应该比使用dist更有效率,因为dist会计算所有可能的距离,而我们只需要几个。