用于收集的多个键值对

时间:2021-02-10 17:37:33

标签: r dplyr tidyr

我遇到了最困难的时候,因为我需要用多个键值对来处理数据。基本上,我每个人都有两个数据点。在这两个数据点上发生变化的每个变量都用 _1 或 _2 表示。例如,我想将 Age_1 和 Age_2 折叠为一对时间(时间 1 和时间 2)和年龄的键值对。但我也想将 Test.date_1 和 Test.date_2 折叠成时间和测试日期的键值对。我估计我想用大约 40 个变量来做这件事。基本上 - 我希望它最后看起来像这样: enter image description here

这是我的数据的一个子集

structure(list(PARTID = c("AGE004", "AGE005", "AGE007", "AGE012", 
"AGE022", "AGE026"), Phase_1 = c("Phase One", "Phase One", "Phase One", 
"Phase One", "Phase One", "Phase One"), Age_1 = c(34L, 27L, 34L, 
35L, 19L, 19L), Sex = c(2L, 1L, 1L, 2L, 1L, 2L), Handedness = c(1L, 
1L, 0L, 1L, 1L, 1L), Test.date_1 = c("14/02/2013", "24/02/2013", 
"25/02/2013", "15/04/2013", "23/04/2013", "20/05/2013"), PartID_2 = c("BGE004", 
"BGE005", "CGE007", "BGE012", "BGE022", "BGE026"), Phase_2 = c("Phase Two", 
"Phase Two", "Phase Three", "Phase Two", "Phase Two", "Phase Two"
), Age_2 = c(37L, 30L, 39L, 38L, 22L, 22L), Test.date_2 = c("30/07/2015", 
"28/07/2015", "21/08/2017", "27/05/2016", "31/05/2016", "3/03/2016"
), RART_1 = c(606.045488, 497.252507, 620.9270198, 667.98753, 
553.8135176, 609.2147245), RMST_1 = c(606.7195939, 612.5271588, 
748.2943148, 723.4190089, 679.2795688, 659.0553977), STRT_1 = c(762.7184264, 
672.3305307, 907.0870536, 821.0780768, 829.0058858, 716.4811003
), SART_1 = c(1005.309323, 844.5555086, 955.1771471, 949.6397829, 
942.4323483, 897.528926), NIRT_1 = c(1001.370827, 793.4688684, 
878.7502713, 898.2766888, 863.6903505, 876.6728154), RNRT_1 = c(915.2199074, 
749.2448674, 771.7413651, 752.5620404, 786.4171282, 877.0446777
), SNRT_1 = c(1088.598633, 841.6114517, 998.3484605, 1049.322361, 
952.1625906, 876.3100229), RAER_1 = c(0.699300699, 2.857142857, 
1.408450704, 2.142857143, 0.699300699, 2.857142857), RMER_1 = c(2.285714286, 
1.129943503, 1.142857143, 1.724137931, 1.694915254, 1.704545455
), STER_1 = c(0.568181818, 1.704545455, 4.597701149, 1.734104046, 
1.694915254, 2.259887006), SAER_1 = c(0.555555556, 2.89017341, 
1.754385965, 2.793296089, 1.704545455, 2.259887006), NIER_1 = c(2.808988764, 
2.824858757, 4.545454545, 2.840909091, 2.247191011, 2.298850575
), RNER_1 = c(3.370786517, 0, 1.136363636, 0, 1.123595506, 3.488372093
), SNER_1 = c(2.247191011, 5.617977528, 7.954545455, 5.681818182, 
3.370786517, 1.136363636), MixingCostRT_1 = c(0.6741059, 115.2746518, 
127.367295, 55.4314789, 125.4660512, 49.8406732), STCostRT_1 = c(155.9988325, 
59.8033719, 158.7927388, 97.6590679, 149.726317, 57.4257026), 
    SACostRT_1 = c(398.5897291, 232.0283498, 206.8828323, 226.220774, 
    263.1527795, 238.4735283), NICostRT_1 = c(394.6512331, 180.9417096, 
    130.4559565, 174.8576799, 184.4107817, 217.6174177), RNCostRT_1 = c(308.5003135, 
    136.7177086, 23.4470503, 29.1430315, 107.1375594, 217.98928
    ), SNCostRT_1 = c(481.8790391, 229.0842929, 250.0541457, 
    325.9033521, 272.8830218, 217.2546252), MixingPropRT_1 = c(0.001112302, 
    0.231823169, 0.20512442, 0.082982805, 0.226549276, 0.08181134
    ), STPropRT_1 = c(0.257118501, 0.097633829, 0.212206261, 
    0.134996547, 0.220419285, 0.087133347), SAPropRT_1 = c(0.656958722, 
    0.378804999, 0.276472543, 0.312710575, 0.387399815, 0.361841401
    ), NIPropRT_1 = c(0.650467262, 0.295401938, 0.174337762, 
    0.241710099, 0.271479948, 0.330195942), RNPropRT_1 = c(0.50847264, 
    0.223202688, 0.031333995, 0.040285134, 0.157722335, 0.330760177
    ), SNPropRT_1 = c(0.794236817, 0.373998589, 0.334165502, 
    0.450504269, 0.401724171, 0.329645468), MixingCostER_1 = c(1.586413587, 
    -1.727199354, -0.265593561, -0.418719212, 0.995614555, -1.152597402
    ), STCostER_1 = c(-1.717532468, 0.574601952, 3.454844006, 
    0.009966115, 0, 0.555341551), SACostER_1 = c(-1.73015873, 
    1.760229907, 0.611528822, 1.069158158, 0.009630201, 0.555341551
    ), NICostER_1 = c(0.523274478, 1.694915254, 3.402597402, 
    1.11677116, 0.552275757, 0.59430512), RNCostER_1 = c(1.085072231, 
    -1.129943503, -0.006493507, -1.724137931, -0.571319748, 1.783826638
    ), SNCostER_1 = c(-0.038523275, 4.488034025, 6.811688312, 
    3.957680251, 1.675871263, -0.568181819), MixingPropER_1 = c("2.26857143", 
    "-0.604519774", "-0.188571428", "-0.195402299", "1.423728814", 
    "-0.403409091"), STPropER_1 = c(-0.751420455, 0.508522727, 
    3.022988505, 0.005780347, 0, 0.325800376), SAPropER_1 = c(-0.756944444, 
    1.557803467, 0.535087719, 0.620111732, 0.005681819, 0.325800376
    ), NIPropER_1 = c(0.228932584, 1.5, 2.977272726, 0.647727273, 
    0.325842697, 0.348659004), RNPropER_1 = c(0.474719101, -1, 
    -0.005681819, -1, -0.337078651, 1.046511627), SNPropER_1 = c(-0.016853933, 
    3.971910112, 5.960227272, 2.295454546, 0.988764045, -0.333333334
    ), `_` = c(NA, NA, NA, NA, NA, NA), RART_2 = c(534.6624201, 
    551.4502338, 708.557581, 766.713627, 595.5418578, 622.2098214
    ), RMS_T = c(520.2972412, 626.2751518, 743.1781747, 822.0529381, 
    615.4418945, 683.4996235), STRT_2 = c(739.1043527, 731.2613225, 
    876.2105908, 911.3537016, 754.9962198, 714.9285414), SART_2 = c(901.0522605, 
    865.5911959, 900.192916, 1030.688477, 835.2385876, 823.8851417
    ), NIRT_2 = c(847.4667198, 826.9730689, 929.7489006, 998.631643, 
    800.8631764, 830.4781627), RNRT_2 = c(682.2760595, 801.8197073, 
    829.0902071, 948.3103198, 713.0618405, 829.2444741), SNRT_2 = c(1000.111254, 
    852.1264305, 1036.621094, 1052.059221, 894.7618273, 831.6824777
    ), RAER_2 = c(2.857142857, 0, 0, 2.857142857, 1.408450704, 
    0.699300699), RMER_2 = c(1.694915254, 1.754385965, 0, 1.129943503, 
    1.136363636, 1.129943503), STER_2 = c(2.840909091, 2.824858757, 
    0, 0, 3.409090909, 0.561797753), SAER_2 = c(0, 2.272727273, 
    0.558659218, 0.558659218, 1.714285714, 1.685393258), NIER_2 = c(1.117318436, 
    3.389830508, 1.111111111, 1.111111111, 3.409090909, 1.685393258
    ), RNER_2 = c(1.123595506, 3.409090909, 1.111111111, 0, 2.272727273, 
    3.370786517), SNER_2 = c(1.111111111, 3.370786517, 1.111111111, 
    2.222222222, 4.545454545, 0), MixingCostRT_2 = c(-14.3651789, 
    74.824918, 34.6205937, 55.3393111, 19.9000367, 61.2898021
    ), STCostRT_2 = c(218.8071115, 104.9861707, 133.0324161, 
    89.3007635, 139.5543253, 31.4289179), SACostRT_2 = c(380.7550193, 
    239.3160441, 157.0147413, 208.6355389, 219.7966931, 140.3855182
    ), NICostRT_2 = c(327.1694786, 200.6979171, 186.5707259, 
    176.5787049, 185.4212819, 146.9785392), RNCostRT_2 = c(161.9788183, 
    175.5445555, 85.9120324, 126.2573817, 97.619946, 145.7448506
    ), SNCostRT_2 = c(479.8140128, 225.8512787, 293.4429193, 
    230.0062829, 279.3199328, 148.1828542), MixingPropRT_2 = c(-0.026867755, 
    0.135687526, 0.048860664, 0.072177289, 0.033415009, 0.098503431
    ), STPropRT_2 = c(0.420542517, 0.167635855, 0.17900474, 0.108631402, 
    0.226754673, 0.045982349), SAPropRT_2 = c(0.731802879, 0.382126041, 
    0.211274694, 0.253798179, 0.357136385, 0.205392239), NIPropRT_2 = c(0.628812634, 
    0.320462845, 0.251044409, 0.214802109, 0.30128154, 0.215038215
    ), RNPropRT_2 = c(0.311319772, 0.28029941, 0.115600855, 0.153587897, 
    0.158617648, 0.213233257), SNPropRT_2 = c(0.922192114, 0.360626281, 
    0.394848677, 0.279794977, 0.453852647, 0.216800199), MixingCostER_2 = c(-1.162227603, 
    1.754385965, 0, -1.727199354, -0.272087068, 0.430642804), 
    STCostER_2 = c(1.145993837, 1.070472792, 0, -1.129943503, 
    2.272727273, -0.56814575), SACostER_2 = c(-1.694915254, 0.518341308, 
    0.558659218, -0.571284285, 0.577922078, 0.555449755), NICostER_2 = c(-0.577596818, 
    1.635444543, 1.111111111, -0.018832392, 2.272727273, 0.555449755
    ), RNCostER_2 = c(-0.571319748, 1.654704944, 1.111111111, 
    -1.129943503, 1.136363637, 2.240843014), SNCostER_2 = c(-0.583804143, 
    1.616400552, 1.111111111, 1.092278719, 3.409090909, -1.129943503
    ), MixingPropER_2 = c("-0.406779661", "", "", "-0.604519774", 
    "-0.193181818", "0.61581921"), STPropER_2 = c(0.676136364, 
    0.610169491, NA, -1, 2.000000001, -0.502808989), SAPropER_2 = c(-1, 
    0.295454546, NA, -0.505586592, 0.508571429, 0.491573033), 
    NIPropER_2 = c(-0.340782123, 0.932203389, NA, -0.016666667, 
    2.000000001, 0.491573033), RNPropER_2 = c(-0.337078651, 0.943181818, 
    NA, -1, 1.000000001, 1.983146067), SNPropER_2 = c(-0.344444444, 
    0.921348315, NA, 0.966666666, 3.000000001, -1)), row.names = c(NA, 
6L), class = "data.frame")

这是我迄今为止尝试过的:

DT.m1 = melt(data, id = c("PARTID","Sex","Handedness"))
DT.m1[, c("Phase", "variable") := tstrsplit(variable, "_", fixed = TRUE)]
DT.c1 = dcast(DT.m1, PARTID + Sex + Handedness + Phase ~ variable, value.var = "value")
DT.c1

1 个答案:

答案 0 :(得分:2)

一种方法可能是1/2然后pivot_longer

pivot_wider

我们需要将所有列转换为字符,以便它们可以暂时共存于同一列中。这可能会导致精度的细微损失。

另请注意,您的数据不包含 library(tidyverse) data %>% mutate(across(everything(),as.character)) %>% pivot_longer(matches("_[12]$"), names_sep = "_", names_to = c("Variable","TimePoint")) %>% pivot_wider(names_from = Variable, values_from = "value") %>% mutate(across(everything(),type.convert)) # A tibble: 12 x 48 PARTID Sex Handedness `_` RMS_T TimePoint Phase Age Test.date PartID RART RMST STRT SART NIRT RNRT SNRT RAER RMER STER <fct> <int> <int> <lgl> <dbl> <int> <fct> <int> <fct> <fct> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> 1 AGE004 2 1 NA 520. 1 Phas… 34 14/02/20… NA 606. 607. 763. 1005. 1001. 915. 1089. 0.699 2.29 0.568 2 AGE004 2 1 NA 520. 2 Phas… 37 30/07/20… BGE004 535. NA 739. 901. 847. 682. 1000. 2.86 1.69 2.84 3 AGE005 1 1 NA 626. 1 Phas… 27 24/02/20… NA 497. 613. 672. 845. 793. 749. 842. 2.86 1.13 1.70 4 AGE005 1 1 NA 626. 2 Phas… 30 28/07/20… BGE005 551. NA 731. 866. 827. 802. 852. 0 1.75 2.82 5 AGE007 1 0 NA 743. 1 Phas… 34 25/02/20… NA 621. 748. 907. 955. 879. 772. 998. 1.41 1.14 4.60 6 AGE007 1 0 NA 743. 2 Phas… 39 21/08/20… CGE007 709. NA 876. 900. 930. 829. 1037. 0 0 0 7 AGE012 2 1 NA 822. 1 Phas… 35 15/04/20… NA 668. 723. 821. 950. 898. 753. 1049. 2.14 1.72 1.73 8 AGE012 2 1 NA 822. 2 Phas… 38 27/05/20… BGE012 767. NA 911. 1031. 999. 948. 1052. 2.86 1.13 0 9 AGE022 1 1 NA 615. 1 Phas… 19 23/04/20… NA 554. 679. 829. 942. 864. 786. 952. 0.699 1.69 1.69 10 AGE022 1 1 NA 615. 2 Phas… 22 31/05/20… BGE022 596. NA 755. 835. 801. 713. 895. 1.41 1.14 3.41 11 AGE026 2 1 NA 683. 1 Phas… 19 20/05/20… NA 609. 659. 716. 898. 877. 877. 876. 2.86 1.70 2.26 12 AGE026 2 1 NA 683. 2 Phas… 22 3/03/2016 BGE026 622. NA 715. 824. 830. 829. 832. 0.699 1.13 0.562 ,也不包含 PartID_1