当具有相同的项目时,支持两个不同项目集之间的减法

时间:2017-12-07 20:19:50

标签: r transactions apriori arules market-basket-analysis

使用arules,我有两个项目集,并且我想在具有相同项目时在两个不同项目集之间进行减法。

> inspect(fsets_model_test)
    items                        support    count
[1] {SURFSKINTEMP=6,MODIS_LST=1} 0.01235235 663  
[2] {TOTCO=13,MODIS_LST=1}       0.01373104 737  
[3] {TOTCO=6,MODIS_LST=1}        0.01393598 748  
[4] {TOTO3=15,MODIS_LST=1}       0.01265045 679  
[5] {TOTH2OVAP=6,MODIS_LST=1}    0.01548236 831  
[6] {TOTH2OVAP=1,MODIS_LST=1}    0.01565004 840  
> inspect(fsets_nonsesmic_test)
    items                                   support    count
[1] {TOTCO=6,MODIS_LST=1}                   0.02192761 10013
[2] {TOTCO=13,MODIS_LST=1}                  0.02261524 10327
[3] {TOTO3=15,MODIS_LST=1}                  0.02432556 11108
[4] {SURFAIRTEMP=3,TOTH2OVAP=1,MODIS_LST=1} 0.01772735  8095
[5] {TOTH2OVAP=1,MODIS_LST=1}               0.02873605 13122
[6] {SURFAIRTEMP=3,TOTH2OVAP=1}             0.01856828  8479

您可以看到项目集fsets_model_test 项目集fsets_nonsesmic_test 具有相同的项 {TOTO3 = 15,MODIS_LST = 1}

我想要做的是减去两个项目集之间的支持,在上面的例子中是 0.02432556 - 0.01265045 = 0.01167511,然后获取新的项目集。

如何在arules中实现这一点,谢谢

以下是示例项目集

一个项目集

fsets_model_test <- new("itemsets"
    , items = new("itemMatrix"
    , data = new("ngCMatrix"
    , i = c(5L, 121L, 74L, 121L, 67L, 121L, 59L, 121L, 33L, 121L, 28L, 
121L)
    , p = c(0L, 2L, 4L, 6L, 8L, 10L, 12L)
    , Dim = c(125L, 6L)
    , Dimnames = list(NULL, NULL)
    , factors = list()
)
    , itemInfo = structure(list(labels = c("SURFSKINTEMP=1", "SURFSKINTEMP=2", 
"SURFSKINTEMP=3", "SURFSKINTEMP=4", "SURFSKINTEMP=5", "SURFSKINTEMP=6", 
"SURFSKINTEMP=7", "SURFSKINTEMP=8", "SURFSKINTEMP=9", "SURFSKINTEMP=10", 
"SURFSKINTEMP=11", "SURFSKINTEMP=12", "SURFSKINTEMP=13", "SURFSKINTEMP=14", 
"SURFSKINTEMP=15", "SURFSKINTEMP=16", "SURFAIRTEMP=1", "SURFAIRTEMP=2", 
"SURFAIRTEMP=3", "SURFAIRTEMP=4", "SURFAIRTEMP=5", "SURFAIRTEMP=6", 
"SURFAIRTEMP=7", "SURFAIRTEMP=8", "SURFAIRTEMP=9", "SURFAIRTEMP=10", 
"SURFAIRTEMP=11", "SURFAIRTEMP=12", "TOTH2OVAP=1", "TOTH2OVAP=2", 
"TOTH2OVAP=3", "TOTH2OVAP=4", "TOTH2OVAP=5", "TOTH2OVAP=6", "TOTH2OVAP=7", 
"TOTH2OVAP=8", "TOTH2OVAP=9", "TOTH2OVAP=10", "TOTH2OVAP=11", 
"TOTH2OVAP=12", "TOTH2OVAP=13", "TOTH2OVAP=14", "TOTH2OVAP=15", 
"TOTH2OVAP=16", "TOTH2OVAP=17", "TOTO3=1", "TOTO3=2", "TOTO3=3", 
"TOTO3=4", "TOTO3=5", "TOTO3=6", "TOTO3=7", "TOTO3=8", "TOTO3=9", 
"TOTO3=10", "TOTO3=11", "TOTO3=12", "TOTO3=13", "TOTO3=14", "TOTO3=15", 
"TOTO3=16", "TOTO3=17", "TOTCO=1", "TOTCO=2", "TOTCO=3", "TOTCO=4", 
"TOTCO=5", "TOTCO=6", "TOTCO=7", "TOTCO=8", "TOTCO=9", "TOTCO=10", 
"TOTCO=11", "TOTCO=12", "TOTCO=13", "TOTCO=14", "TOTCO=15", "TOTCH4=1", 
"TOTCH4=2", "TOTCH4=3", "TOTCH4=4", "TOTCH4=5", "TOTCH4=6", "TOTCH4=7", 
"TOTCH4=8", "TOTCH4=9", "TOTCH4=10", "TOTCH4=11", "TOTCH4=12", 
"TOTCH4=13", "TOTCH4=14", "OLR_ARIS=1", "OLR_ARIS=2", "OLR_ARIS=3", 
"OLR_ARIS=4", "OLR_ARIS=5", "OLR_ARIS=6", "OLR_ARIS=7", "OLR_ARIS=8", 
"OLR_ARIS=9", "OLR_ARIS=10", "CLROLR_ARIS=1", "CLROLR_ARIS=2", 
"CLROLR_ARIS=3", "CLROLR_ARIS=4", "CLROLR_ARIS=5", "CLROLR_ARIS=6", 
"CLROLR_ARIS=7", "CLROLR_ARIS=8", "CLROLR_ARIS=9", "CLROLR_ARIS=10", 
"OLR_NOAA=1", "OLR_NOAA=2", "OLR_NOAA=3", "OLR_NOAA=4", "OLR_NOAA=5", 
"OLR_NOAA=6", "OLR_NOAA=7", "OLR_NOAA=8", "OLR_NOAA=9", "OLR_NOAA=10", 
"MODIS_LST=1", "MODIS_LST=2", "MODIS_LST=3", "MODIS_LST=4"), 
    variables = structure(c(6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 
    6L, 6L, 6L, 6L, 6L, 6L, 6L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 
    5L, 5L, 5L, 5L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 
    9L, 9L, 9L, 9L, 9L, 9L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 
    10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 8L, 8L, 
    8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 7L, 7L, 
    7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 3L, 3L, 3L, 
    3L, 3L, 3L, 3L, 3L, 3L, 3L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 2L, 2L, 2L, 
    2L), .Label = c("CLROLR_ARIS", "MODIS_LST", "OLR_ARIS", "OLR_NOAA", 
    "SURFAIRTEMP", "SURFSKINTEMP", "TOTCH4", "TOTCO", "TOTH2OVAP", 
    "TOTO3"), class = "factor"), levels = structure(c(1L, 10L, 
    11L, 12L, 13L, 14L, 15L, 16L, 17L, 2L, 3L, 4L, 5L, 6L, 7L, 
    8L, 1L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 2L, 3L, 4L, 
    1L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 2L, 3L, 4L, 5L, 
    6L, 7L, 8L, 9L, 1L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 
    2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 1L, 10L, 11L, 12L, 13L, 14L, 
    15L, 16L, 17L, 2L, 3L, 4L, 5L, 6L, 7L, 1L, 10L, 11L, 12L, 
    13L, 14L, 15L, 16L, 17L, 2L, 3L, 4L, 5L, 6L, 1L, 10L, 11L, 
    12L, 13L, 14L, 15L, 16L, 17L, 2L, 1L, 10L, 11L, 12L, 13L, 
    14L, 15L, 16L, 17L, 2L, 1L, 10L, 11L, 12L, 13L, 14L, 15L, 
    16L, 17L, 2L, 1L, 10L, 11L, 12L), .Label = c("1", "10", "11", 
    "12", "13", "14", "15", "16", "17", "2", "3", "4", "5", "6", 
    "7", "8", "9"), class = "factor")), .Names = c("labels", 
"variables", "levels"), row.names = c(NA, -125L), class = "data.frame")
    , itemsetInfo = structure(list(), .Names = character(0), row.names = integer(0), class = "data.frame")
)
    , tidLists = NULL
    , quality = structure(list(support = c(0.0123523493684093, 0.0137310429630734, 
0.0139359839028207, 0.0126504452807691, 0.0154823564481872, 0.0156500353988896
), count = c(663, 737, 748, 679, 831, 840)), .Names = c("support", 
"count"), row.names = c(NA, 6L), class = "data.frame")
    , info = structure(list(data = model_data_tr, ntransactions = 53674L, 
    support = 0.01), .Names = c("data", "ntransactions", "support"
))
)

另一个项目集是:

fsets_nonsesmic_test <- new("itemsets"
    , items = new("itemMatrix"
    , data = new("ngCMatrix"
    , i = c(67L, 121L, 74L, 121L, 59L, 121L, 18L, 28L, 121L, 28L, 121L, 
18L, 28L)
    , p = c(0L, 2L, 4L, 6L, 9L, 11L, 13L)
    , Dim = c(125L, 6L)
    , Dimnames = list(NULL, NULL)
    , factors = list()
)
    , itemInfo = structure(list(labels = c("SURFSKINTEMP=1", "SURFSKINTEMP=2", 
"SURFSKINTEMP=3", "SURFSKINTEMP=4", "SURFSKINTEMP=5", "SURFSKINTEMP=6", 
"SURFSKINTEMP=7", "SURFSKINTEMP=8", "SURFSKINTEMP=9", "SURFSKINTEMP=10", 
"SURFSKINTEMP=11", "SURFSKINTEMP=12", "SURFSKINTEMP=13", "SURFSKINTEMP=14", 
"SURFSKINTEMP=15", "SURFSKINTEMP=16", "SURFAIRTEMP=1", "SURFAIRTEMP=2", 
"SURFAIRTEMP=3", "SURFAIRTEMP=4", "SURFAIRTEMP=5", "SURFAIRTEMP=6", 
"SURFAIRTEMP=7", "SURFAIRTEMP=8", "SURFAIRTEMP=9", "SURFAIRTEMP=10", 
"SURFAIRTEMP=11", "SURFAIRTEMP=12", "TOTH2OVAP=1", "TOTH2OVAP=2", 
"TOTH2OVAP=3", "TOTH2OVAP=4", "TOTH2OVAP=5", "TOTH2OVAP=6", "TOTH2OVAP=7", 
"TOTH2OVAP=8", "TOTH2OVAP=9", "TOTH2OVAP=10", "TOTH2OVAP=11", 
"TOTH2OVAP=12", "TOTH2OVAP=13", "TOTH2OVAP=14", "TOTH2OVAP=15", 
"TOTH2OVAP=16", "TOTH2OVAP=17", "TOTO3=1", "TOTO3=2", "TOTO3=3", 
"TOTO3=4", "TOTO3=5", "TOTO3=6", "TOTO3=7", "TOTO3=8", "TOTO3=9", 
"TOTO3=10", "TOTO3=11", "TOTO3=12", "TOTO3=13", "TOTO3=14", "TOTO3=15", 
"TOTO3=16", "TOTO3=17", "TOTCO=1", "TOTCO=2", "TOTCO=3", "TOTCO=4", 
"TOTCO=5", "TOTCO=6", "TOTCO=7", "TOTCO=8", "TOTCO=9", "TOTCO=10", 
"TOTCO=11", "TOTCO=12", "TOTCO=13", "TOTCO=14", "TOTCO=15", "TOTCH4=1", 
"TOTCH4=2", "TOTCH4=3", "TOTCH4=4", "TOTCH4=5", "TOTCH4=6", "TOTCH4=7", 
"TOTCH4=8", "TOTCH4=9", "TOTCH4=10", "TOTCH4=11", "TOTCH4=12", 
"TOTCH4=13", "TOTCH4=14", "OLR_ARIS=1", "OLR_ARIS=2", "OLR_ARIS=3", 
"OLR_ARIS=4", "OLR_ARIS=5", "OLR_ARIS=6", "OLR_ARIS=7", "OLR_ARIS=8", 
"OLR_ARIS=9", "OLR_ARIS=10", "CLROLR_ARIS=1", "CLROLR_ARIS=2", 
"CLROLR_ARIS=3", "CLROLR_ARIS=4", "CLROLR_ARIS=5", "CLROLR_ARIS=6", 
"CLROLR_ARIS=7", "CLROLR_ARIS=8", "CLROLR_ARIS=9", "CLROLR_ARIS=10", 
"OLR_NOAA=1", "OLR_NOAA=2", "OLR_NOAA=3", "OLR_NOAA=4", "OLR_NOAA=5", 
"OLR_NOAA=6", "OLR_NOAA=7", "OLR_NOAA=8", "OLR_NOAA=9", "OLR_NOAA=10", 
"MODIS_LST=1", "MODIS_LST=2", "MODIS_LST=3", "MODIS_LST=4"), 
    variables = structure(c(6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 
    6L, 6L, 6L, 6L, 6L, 6L, 6L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 
    5L, 5L, 5L, 5L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 
    9L, 9L, 9L, 9L, 9L, 9L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 
    10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 8L, 8L, 
    8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 7L, 7L, 
    7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 3L, 3L, 3L, 
    3L, 3L, 3L, 3L, 3L, 3L, 3L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 2L, 2L, 2L, 
    2L), .Label = c("CLROLR_ARIS", "MODIS_LST", "OLR_ARIS", "OLR_NOAA", 
    "SURFAIRTEMP", "SURFSKINTEMP", "TOTCH4", "TOTCO", "TOTH2OVAP", 
    "TOTO3"), class = "factor"), levels = structure(c(1L, 10L, 
    11L, 12L, 13L, 14L, 15L, 16L, 17L, 2L, 3L, 4L, 5L, 6L, 7L, 
    8L, 1L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 2L, 3L, 4L, 
    1L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 2L, 3L, 4L, 5L, 
    6L, 7L, 8L, 9L, 1L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 
    2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 1L, 10L, 11L, 12L, 13L, 14L, 
    15L, 16L, 17L, 2L, 3L, 4L, 5L, 6L, 7L, 1L, 10L, 11L, 12L, 
    13L, 14L, 15L, 16L, 17L, 2L, 3L, 4L, 5L, 6L, 1L, 10L, 11L, 
    12L, 13L, 14L, 15L, 16L, 17L, 2L, 1L, 10L, 11L, 12L, 13L, 
    14L, 15L, 16L, 17L, 2L, 1L, 10L, 11L, 12L, 13L, 14L, 15L, 
    16L, 17L, 2L, 1L, 10L, 11L, 12L), .Label = c("1", "10", "11", 
    "12", "13", "14", "15", "16", "17", "2", "3", "4", "5", "6", 
    "7", "8", "9"), class = "factor")), .Names = c("labels", 
"variables", "levels"), row.names = c(NA, -125L), class = "data.frame")
    , itemsetInfo = structure(list(), .Names = character(0), row.names = integer(0), class = "data.frame")
)
    , tidLists = NULL
    , quality = structure(list(support = c(0.0219276058330541, 0.0226152387334415, 
0.024325561329628, 0.0177273513650827, 0.0287360475123675, 0.0185682782241552
), count = c(10013, 10327, 11108, 8095, 13122, 8479)), .Names = c("support", 
"count"), row.names = c(NA, 6L), class = "data.frame")
    , info = structure(list(data = nonsesmic_data_tr, ntransactions = 456639L, 
    support = 0.01), .Names = c("data", "ntransactions", "support"
))
)

1 个答案:

答案 0 :(得分:0)

如果这两个集合来自兼容的事务数据(请参阅? itemCoding),那么您可以使用match查找两个集合中的匹配项集。在那之后,应该很容易减去支持。

相关问题