Question

我有一个庞大的二进制数据文件（8GB），像稀疏矩阵（非零数据，行索引，col索引）那样写，出于速度原因，我想在C中读取/导入它。然后，一旦构造好矩阵，出于显示原因，我想将其导入Python（带有ctypes接口）。目前，我设法只使用python编写了整个代码，并且可以正常工作。我不喜欢它，因为它消耗大量内存。这就是为什么我要在2中解决问题。

有人知道如何用C语言编写稀疏矩阵并通过ctypes以稀疏python格式导入它吗？

谢谢。

下面，您将找到我的纯Python代码以读取二进制文件并导入数据，以将其作为稀疏矩阵返回。

<!-- language: lang-py -->
def Sparse_read(binFileName,DoseGridSize,NbrSpots):
NbrVoxels = DoseGridSize[0]*DoseGridSize[1]*DoseGridSize[2]
sparse2DMatrix = np.array((NbrVoxels,NbrSpots))

try:
    fid = open(binFileName,'rb')
except IOError:
    print('Unable to open file ', binFileName)



col_index = []
row_index = []
beamlet_data = []
last_stacked_col = 0
num_unstacked_col = 1

for i in range(NbrSpots):

    [NonZeroVoxels] = struct.unpack('i', fid.read(4))
    [BeamID] = struct.unpack('i', fid.read(4))
    [LayerID] = struct.unpack('i', fid.read(4))
    [xcoord] = struct.unpack('<f',fid.read(4))
    [ycoord] = struct.unpack('<f', fid.read(4))
    print("Spot " + str(i) + ": BeamID=" + str(BeamID) + " LayerID=" + str(LayerID) + " Position=(" + str(xcoord) + ";" + str(ycoord) + ")")


    ReadVoxels = 0
    while(1):
        [NbrContinuousValues] = struct.unpack('i',fid.read(4))
        ReadVoxels+=NbrContinuousValues

        [FirstIndex] = struct.unpack('i',fid.read(4))

        for j in range(NbrContinuousValues):
            [temp] = struct.unpack('<f',fid.read(4))
            beamlet_data.append(temp)
            row_index.append(FirstIndex+j)


        if (ReadVoxels >= NonZeroVoxels):
            index_list = np.ones((NonZeroVoxels,), dtype=int) * (i-last_stacked_col)
            col_index = col_index + index_list.tolist()

            if i == 0:
                sparse2DMatrix = sp.csc_matrix((beamlet_data, (row_index, col_index)), shape=(NbrVoxels, 1), dtype=np.float32)
                row_index = []
                col_index = []
                beamlet_data = []
                last_stacked_col = i+1
                num_unstacked_col = 1
            elif(len(beamlet_data) > 1e7):
                A = sp.csc_matrix((beamlet_data, (row_index, col_index)), shape=(NbrVoxels, num_unstacked_col),dtype=np.float32)
                sparse2DMatrix = sp.hstack([sparse2DMatrix, A])
                row_index = []
                col_index = []
                beamlet_data = []
                last_stacked_col = i+1
                num_unstacked_col = 1
            else:
                num_unstacked_col += 1

            break

# stack last cols  
A = sp.csc_matrix((beamlet_data, (row_index, col_index)), shape=(NbrVoxels, num_unstacked_col-1), dtype=np.float32)
sparse2DMatrix = sp.hstack([sparse2DMatrix, A])

fid.close()

return sparse2DMatrix

将C稀疏矩阵转换为python稀疏矩阵

0 个答案: