从Tidy Dataframes创建节点和边缘数据帧

时间:2018-04-07 11:41:36

标签: r nodes edges networkd3

我有一个这种结构的数据框:

df <- data.frame(var1 = c(1,1,1,2,2,3,3,3,3),
                 cat1 = c("A","B","D","B","C","D","E","B","A"))`

> df
  var1 cat1
1    1    A
2    1    B
3    1    D
4    2    B
5    2    C
6    3    D
7    3    E
8    3    B
9    3    A

我正在寻找从它创建节点和边缘数据帧,以便我可以使用VisNetwork绘制网络图。此网络将显示不同cat1值之间的连接数/强度,按var1值分组。

我对节点数据框进行了排序:

nodes <- data.frame(id = unique(df$cat1))
> nodes
  id
1  A
2  B
3  D
4  C
5  E

我想要帮助的是如何以下列方式处理df: 对于df中var1的每个不同值,计算var1的该值共有的节点组,以给出最终看起来像下面那个的边数据帧。请注意,我并不担心沿边缘流动的方向。只是它们是连接的就是我所需要的。

> edges
  from to value
1    A  B     2
2    A  D     2
3    A  E     1
4    B  C     1
5    B  D     2
6    B  E     1
7    D  E     1

感谢期待, Nevil

更新:我发现了here类似的问题,并且已经调整了代码,这已经接近我想要的,但不是那么...

    > df %>% group_by(var1) %>%
             filter(n()>=2) %>% group_by(var1) %>%
             do(data.frame(t(combn(.$cat1, 2,function(x) sort(x))), 
                           stringsAsFactors=FALSE))

# A tibble: 10 x 3
# Groups:   var1 [3]
    var1 X1    X2   
   <dbl> <chr> <chr>
 1    1. A     B    
 2    1. A     D    
 3    1. B     D    
 4    2. B     C    
 5    3. D     E    
 6    3. B     D    
 7    3. A     D    
 8    3. B     E    
 9    3. A     E    
10    3. A     B  

2 个答案:

答案 0 :(得分:1)

我不知道是否已经有合适的功能来完成这项任务。这是一个详细的程序。这个,你应该能够定义自己的功能。希望它有所帮助!

# create an adjacency matrix
mat <- table(df)
mat <- t(mat) %*% mat 
as.table(mat) # look at your adjacency matrix
# since the network is not directed, we can consider only the (strictly) upper triangular matrix 
mat[lower.tri(mat, diag = TRUE)] <- 0
as.table(mat) # look at the new adjacency matrix

library(dplyr)
edges <- as.data.frame(as.table(mat))
edges <- filter(edges, Freq != 0)
colnames(edges) <- c("from", "to", "value")
edges <- arrange(edges, from)
edges # output

#  from to value
#1    A  B     2
#2    A  D     2
#3    A  E     1
#4    B  C     1
#5    B  D     2
#6    B  E     1
#7    D  E     1

答案 1 :(得分:0)

这里还有其他两种方式......

在基地R ......

values <- unique(df$var1[duplicated(df$var1)])

do.call(rbind,
  lapply(values, function(i) {
    nodes <- as.character(df$cat1[df$var1 == i])
    edges <- combn(nodes, 2)
    data.frame(from = edges[1, ],
               to = edges[2, ],
               value = i,
               stringsAsFactors = F)
  })
)

在tidyverse ......

library(dplyr)
library(tidyr)

df %>%
  group_by(var1) %>%
  filter(n() >= 2) %>%
  mutate(cat1 = as.character(cat1)) %>% 
  summarise(edges = list(data.frame(t(combn(cat1, 2)), stringsAsFactors = F))) %>%
  unnest(edges) %>% 
  select(from = X1, to = X2, value = var1)

在tidyverse中使用tidyr::complete ...

library(dplyr)
library(tidyr)

df %>%
  group_by(var1) %>%
  mutate(cat1 = as.character(cat1)) %>% 
  mutate(i.cat1 = cat1) %>% 
  complete(cat1, i.cat1) %>% 
  filter(cat1 < i.cat1) %>% 
  select(from = cat1, to = i.cat1, value = var1)

在tidyverse中使用tidyr::expand ...

library(dplyr)
library(tidyr)

df %>%
  group_by(var1) %>%
  mutate(cat1 = as.character(cat1)) %>%
  expand(cat1, to = cat1) %>% 
  filter(cat1 < to) %>% 
  select(from = cat1, to, value = var1)