使用限制来定义唯一身份

时间:2015-05-18 19:55:14

标签: r dataframe dplyr

USER_LACKS_PERMISSIONS : This user lacks sufficient permissions to access this resource

对于人[1,2](年龄= 10),这适合Person <- c(1,2,3) Age <- c(10,22,30) Height <- c(140,185,160) Weight <- c(65, 80, 75) People <- data.frame(Person, Age, Height, Weight) Age_cats_type1 [5-15], [20-30], [35-45] Age_cats_type2 [8-13], [14-16], [18-40] Height_cat_Type1 [100-120], [121-140], [141-186] Height_cat_type2 [110-125], [126-145], [146-190] Weight_cat_Type1 [50-60], [61-78], [79-85] Weight_cat_Type2 [55-75], [76-90], [91-100] Age_cats_type1==1。 对于人[1,3](身高= 140),这适合Age_cats_type2==1Height_cat_Type1==2

现在,我想为Height_cat_Type2==2(Age_cats_type1==1)|(Age_cats_type1==2)(Height_cats_type1==1)|(Height_cats_type1==2)的间隔的每个唯一可能结果创建一个表格。

所需的输出应该如下面的黄色图像。 上表是每个invterval的可能性摘要

这与following question密切相关,但当您按照BrodieG概述的此处使用的代码时,错误会在第三次迭代时出现。 在此示例中,我们在(Weight_cats_type1==1)|(Weight_cats_type1==2)

中使用foverlaps

我使用了以下代码

data.table

但在PEOPLE6中收到错误。

library(intervals)
# create our limits

AGE_cats_type1 <- Intervals(
  matrix(c(5, 15, 20, 30, 35, 40), ncol = 2, byrow = TRUE ),
  closed = c( TRUE, T ),
  type = "Z"
)

AGE_cats_type2 <- Intervals(
  matrix(c(8, 13, 14, 16, 18, 40), ncol = 2, byrow = TRUE ),
  closed = c( TRUE, T ),
  type = "Z"
)

Height_cats_type1 <- Intervals(
  matrix(c(100, 120, 121, 140, 141, 186), ncol = 2, byrow = TRUE ),
  closed = c( TRUE, T ),
  type = "Z"
)

Height_cats_type2 <- Intervals(
  matrix(c(110, 125, 126, 145, 146, 190), ncol = 2, byrow = TRUE ),
  closed = c( TRUE, T ),
  type = "Z"
)

Weight_cats_type1 <- Intervals(
  matrix(c(50, 60, 61, 78, 79, 85), ncol = 2, byrow = TRUE ),
  closed = c( TRUE, T ),
  type = "Z"
)

Weight_cats_type2 <- Intervals(
  matrix(c(55, 75, 76, 90, 91, 100), ncol = 2, byrow = TRUE ),
  closed = c( TRUE, T ),
  type = "Z"
)

#now format data
# first for age
library(data.table)
PEOPLE1 <- data.table(People)
PEOPLE1[, A1:=Age]

I_age_1 <- data.table(cbind(data.frame(AGE_cats_type1), idX=1:3, idY=0))
I_age_2 <- data.table(cbind(data.frame(AGE_cats_type2), idX=0, idY=1:3))

setkey(I_age_1, X1, X2)
setkey(I_age_2, X1, X2)

PEOPLE2 <- data.frame(rbind(
  foverlaps(PEOPLE1, I_age_1, by.x=c("Age", "A1"), nomatch=0),
  foverlaps(PEOPLE1, I_age_2, by.x=c("Age", "A1"), nomatch=0)))

####################################################
# second iteration for  height

PEOPLE3 <- data.table(PEOPLE2)
PEOPLE3[, B1:=Height]

I_height_1 <- data.table(cbind(data.frame(Height_cats_type1), idXa=1:3, idYa=0))
I_height_2 <- data.table(cbind(data.frame(Height_cats_type2), idXa=0, idYa=1:3))

setkey(I_height_1, X1, X2)
setkey(I_height_2, X1, X2)

PEOPLE4 <- data.frame(rbind(
  foverlaps(PEOPLE3, I_height_1, by.x=c("Height", "B1"), nomatch=0),
  foverlaps(PEOPLE3, I_height_1, by.x=c("Height", "B1"), nomatch=0)))

################################################
# third iteration

PEOPLE5 <- data.table(PEOPLE4)
PEOPLE5[, C1:=Weight]

I_weight_1 <- data.table(cbind(data.frame(Weight_cats_type1), idXb=1:3, idYb=0))
I_weight_2 <- data.table(cbind(data.frame(Weight_cats_type2), idXb=0, idYb=1:3))

setkey(I_weight_1, X1, X2)
setkey(I_weight_2, X1, X2)

PEOPLE6 <- data.frame(rbind(
  foverlaps(PEOPLE5, I_weight_1, by.x=c("Height", "B1"), nomatch=0),
  foverlaps(PEOPLE5, I_weight_2, by.x=c("Height", "B1"), nomatch=0)))

当我看到PEOPLE4时,我们看到idX idY idxA和idyA是Age_cats_type1,Age_cats_type2,Height_cat_Type1和Height_cat_Type2值 X

1 个答案:

答案 0 :(得分:0)

您在提出问题时遇到一些问题。让我们尝试重建你的意思。

# Let's first construct the data
Person <- c(1,2,3)
Age    <- c(10,22,30)
Height <- c(140,185,160)
Weight <- c(65, 80, 75)

People <- data.frame(Person, Age, Height, Weight)

# Results in something like this:
#   Person Age Height Weight
# 1      1  10    140     65
# 2      2  22    185     80
# 3      3  30    160     75

# Now we want to represent ranges. One way to do it would be:

Age_cats_type1 <- list(c(5, 15), c(20, 30), c(35, 45))
Age_cats_type2 <- list(c(8, 13), c(14, 16), c(18, 40))

Height_cat_Type1 <- list(c(100, 120), c(121, 140), c(141, 186))
Height_cat_Type2 <- list(c(110, 125), c(126, 145), c(146, 190))

Weight_cat_Type1 <- list(c(50, 60), c(61, 78), c(79, 85))
Weight_cat_Type2 <- list(c(55, 75), c(76, 90), c(91, 100))

# Then you mentioned something like People[1,1] meant age == 10.
# I believe you made a mistake here. If you type People[1, 1] in the console,
# you'll find you that it returns Person == 1. Therefore, I think
# that you meant to say the People data frame was constructed without the Person vector.

People <- data.frame(Age, Height, Weight)

# Now People[1, 1] returns age == 10.

# Then you went on to say that you wanted some function that returned Age_cats_type == 1
# Well, it seems that you want the first element of the list of ranges that contains the specified value.
# Then let's build it

contains_value <- function(range, value) {
    lower <- range[1]
    upper <- range[2]
    lower <= value && value <= upper
}

range_index <- function(ranges_list, value){
    which(sapply(ranges_list, contains_value, value))[1]
}

range_index(Age_cats_type1, People[1, 1]) # 1
range_index(Age_cats_type2, People[1, 1]) # 1
range_index(Height_cat_Type1, People[1, 2]) # 2
range_index(Height_cat_Type2, People[1, 2]) # 2

# Now I didn't understand what the table you were trying to construct was, but maybe these functions will help you build it.
相关问题