将C稀疏矩阵转换为python稀疏矩阵

时间:2020-03-26 09:54:47

标签: c python-3.x matrix sparse-matrix

我有一个庞大的二进制数据文件(8GB),像稀疏矩阵(非零数据,行索引,col索引)那样写,出于速度原因,我想在C中读取/导入它。然后,一旦构造好矩阵,出于显示原因,我想将其导入Python(带有ctypes接口)。目前,我设法只使用python编写了整个代码,并且可以正常工作。我不喜欢它,因为它消耗大量内存。这就是为什么我要在2中解决问题。

有人知道如何用C语言编写稀疏矩阵并通过ctypes以稀疏python格式导入它吗?

谢谢。

下面,您将找到我的纯Python代码以读取二进制文件并导入数据,以将其作为稀疏矩阵返回。

<!-- language: lang-py -->
def Sparse_read(binFileName,DoseGridSize,NbrSpots):
NbrVoxels = DoseGridSize[0]*DoseGridSize[1]*DoseGridSize[2]
sparse2DMatrix = np.array((NbrVoxels,NbrSpots))

try:
    fid = open(binFileName,'rb')
except IOError:
    print('Unable to open file ', binFileName)



col_index = []
row_index = []
beamlet_data = []
last_stacked_col = 0
num_unstacked_col = 1

for i in range(NbrSpots):

    [NonZeroVoxels] = struct.unpack('i', fid.read(4))
    [BeamID] = struct.unpack('i', fid.read(4))
    [LayerID] = struct.unpack('i', fid.read(4))
    [xcoord] = struct.unpack('<f',fid.read(4))
    [ycoord] = struct.unpack('<f', fid.read(4))
    print("Spot " + str(i) + ": BeamID=" + str(BeamID) + " LayerID=" + str(LayerID) + " Position=(" + str(xcoord) + ";" + str(ycoord) + ")")


    ReadVoxels = 0
    while(1):
        [NbrContinuousValues] = struct.unpack('i',fid.read(4))
        ReadVoxels+=NbrContinuousValues

        [FirstIndex] = struct.unpack('i',fid.read(4))

        for j in range(NbrContinuousValues):
            [temp] = struct.unpack('<f',fid.read(4))
            beamlet_data.append(temp)
            row_index.append(FirstIndex+j)


        if (ReadVoxels >= NonZeroVoxels):
            index_list = np.ones((NonZeroVoxels,), dtype=int) * (i-last_stacked_col)
            col_index = col_index + index_list.tolist()

            if i == 0:
                sparse2DMatrix = sp.csc_matrix((beamlet_data, (row_index, col_index)), shape=(NbrVoxels, 1), dtype=np.float32)
                row_index = []
                col_index = []
                beamlet_data = []
                last_stacked_col = i+1
                num_unstacked_col = 1
            elif(len(beamlet_data) > 1e7):
                A = sp.csc_matrix((beamlet_data, (row_index, col_index)), shape=(NbrVoxels, num_unstacked_col),dtype=np.float32)
                sparse2DMatrix = sp.hstack([sparse2DMatrix, A])
                row_index = []
                col_index = []
                beamlet_data = []
                last_stacked_col = i+1
                num_unstacked_col = 1
            else:
                num_unstacked_col += 1

            break

# stack last cols  
A = sp.csc_matrix((beamlet_data, (row_index, col_index)), shape=(NbrVoxels, num_unstacked_col-1), dtype=np.float32)
sparse2DMatrix = sp.hstack([sparse2DMatrix, A])

fid.close()

return sparse2DMatrix

0 个答案:

没有答案