Question

假设我们有一个结构列表graph_node *：

struct graph_node{
  int from;
  int to;
  float prob;
}

现在，假设在列表中重复了几个.from和.to元素。例如：我们可以有两个节点具有相同的.from和.to属性（5,6，.6）和（5,6，.5）。

我想知道的是，如果有一种推力或cuda的方法来合并这两个元素并添加它们的概率（对于前面的例子有5,6和1.1）或者只是添加对象的所有概率包含相同的.from和.to，并为具有该键的所有元素分配增加的概率（5,6,1.1和5,6,1.1）。

Answer 1

如评论中@talonmies所示，可以使用thrust::reduce_by_key完成此操作。首先，必须对graph_node列表进行排序，以便将相同的节点组合在一起。然后reduce_by_key可用于对节点进行求和。

这可能有很多变化，取决于您实际上是如何愿意存储数据，以及是否要允许修改输入列表等。我将假设数据必须存储在矢量中。您定义的结构，输出列表必须与输入列表分开。

在这种情况下，我们需要为排序操作提供一个仿函数，以指示如何按排序顺序排列graph_node。我们还需要为reduce_by_key操作提供相等测试和求和运算符。这是一个完整的例子，展示了一种可能的方法：

$ cat t13.cu

#include <thrust/device_vector.h>
#include <thrust/sort.h>
#include <thrust/reduce.h>
#include <iostream>
#include <vector>
#include <thrust/host_vector.h>

struct graph_node{
  int from;
  int to;
  float prob;
};

struct my_graph_node_sort
{
  __host__ __device__
  bool operator()(graph_node &a, graph_node &b){
    if (a.from > b.from) return false;
    if (a.from < b.from) return true;
    return (a.to <= b.to);
  }
};

struct my_graph_node_equality
{
  __host__ __device__
  bool operator()(const graph_node &a, const graph_node &b){
    return ((a.from == b.from) && (a.to == b.to));
  }
};

struct my_graph_node_reduce
{
  __host__ __device__
  graph_node operator()(const graph_node &a, const graph_node &b){
    graph_node t = a;
    t.prob += b.prob;
    return t;
  }
};


int main(){

  std::vector<graph_node> h_n = { {0,1,0.1f}, {0,2,0.1f},{5,6,0.6f},{1,2,0.1f},{2,5,0.1f},{5,6, 0.5f}};
  thrust::device_vector<graph_node> d_n = h_n;
  thrust::device_vector<graph_node> d_kr(d_n.size());
  thrust::device_vector<graph_node> d_vr(d_n.size());
  thrust::sort(d_n.begin(), d_n.end(), my_graph_node_sort());
  int rs = (thrust::reduce_by_key(d_n.begin(), d_n.end(), d_n.begin(),d_kr.begin(), d_vr.begin(), my_graph_node_equality(), my_graph_node_reduce())).first - d_kr.begin();
  thrust::host_vector<graph_node> h_r = d_vr;
  std::cout << "from,to,prob" << std::endl;
  for (int i = 0; i < rs; i++)
    std::cout << h_r[i].from << "," << h_r[i].to << "," << h_r[i].prob << std::endl;
}
$ nvcc -std=c++11 -arch=sm_35 -o t13 t13.cu
$ ./t13
from,to,prob
0,1,0.1
0,2,0.1
1,2,0.1
2,5,0.1
5,6,1.1
$

与许多CUDA代码一样，AoS格式的数据排列可能不是处理效率的最佳选择。将数据存储重新排列为.from，.to和.prob元素的单独向量集可能会提高效率，并简化代码。

合并列表与CUDA中的重复键

1 个答案: