有效地行标准化矩阵

时间:2011-12-02 15:54:47

标签: python matrix numpy scipy

我需要一种有效的方法来对稀疏矩阵进行行标准化。

鉴于

W = matrix([[0, 1, 0, 1, 0, 0, 0, 0, 0],
            [1, 0, 1, 0, 1, 0, 0, 0, 0],
            [0, 1, 0, 0, 0, 1, 0, 0, 0],
            [1, 0, 0, 0, 1, 0, 1, 0, 0],
            [0, 1, 0, 1, 0, 1, 0, 1, 0],
            [0, 0, 1, 0, 1, 0, 0, 0, 1],
            [0, 0, 0, 1, 0, 0, 0, 1, 0],
            [0, 0, 0, 0, 1, 0, 1, 0, 1],
            [0, 0, 0, 0, 0, 1, 0, 1, 0]])
row_sums = W.sum(1)

我需要制作......

W2 = matrix([[0.  , 0.5 , 0.  , 0.5 , 0.  , 0.  , 0.  , 0.  , 0.  ],
             [0.33, 0.  , 0.33, 0.  , 0.33, 0.  , 0.  , 0.  , 0.  ],
             [0.  , 0.5 , 0.  , 0.  , 0.  , 0.5 , 0.  , 0.  , 0.  ],
             [0.33, 0.  , 0.  , 0.  , 0.33, 0.  , 0.33, 0.  , 0.  ],
             [0.  , 0.25, 0.  , 0.25, 0.  , 0.25, 0.  , 0.25, 0.  ],
             [0.  , 0.  , 0.33, 0.  , 0.33, 0.  , 0.  , 0.  , 0.33],
             [0.  , 0.  , 0.  , 0.5 , 0.  , 0.  , 0.  , 0.5 , 0.  ],
             [0.  , 0.  , 0.  , 0.  , 0.33, 0.  , 0.33, 0.  , 0.33],
             [0.  , 0.  , 0.  , 0.  , 0.  , 0.5 , 0.  , 0.5 , 0.  ]]) 

其中,

for i in range(9):
    W2[i] = W[i]/row_sums[i]

我想找到一种方法来做这个没有循环(即Vectorized)和使用Scipy.sparse矩阵。 W可以在10mil x 10mil时大到。

1 个答案:

答案 0 :(得分:6)

有一点矩阵代数

>>> cc
<9x9 sparse matrix of type '<type 'numpy.int32'>'
    with 24 stored elements in Compressed Sparse Row format>
>>> ccd = sparse.spdiags(1./cc.sum(1).T, 0, *cc.shape)
>>> ccn = ccd * cc
>>> np.round(ccn.todense(), 2)
array([[ 0.  ,  0.5 ,  0.  ,  0.5 ,  0.  ,  0.  ,  0.  ,  0.  ,  0.  ],
       [ 0.33,  0.  ,  0.33,  0.  ,  0.33,  0.  ,  0.  ,  0.  ,  0.  ],
       [ 0.  ,  0.5 ,  0.  ,  0.  ,  0.  ,  0.5 ,  0.  ,  0.  ,  0.  ],
       [ 0.33,  0.  ,  0.  ,  0.  ,  0.33,  0.  ,  0.33,  0.  ,  0.  ],
       [ 0.  ,  0.25,  0.  ,  0.25,  0.  ,  0.25,  0.  ,  0.25,  0.  ],
       [ 0.  ,  0.  ,  0.33,  0.  ,  0.33,  0.  ,  0.  ,  0.  ,  0.33],
       [ 0.  ,  0.  ,  0.  ,  0.5 ,  0.  ,  0.  ,  0.  ,  0.5 ,  0.  ],
       [ 0.  ,  0.  ,  0.  ,  0.  ,  0.33,  0.  ,  0.33,  0.  ,  0.33],
       [ 0.  ,  0.  ,  0.  ,  0.  ,  0.  ,  0.5 ,  0.  ,  0.5 ,  0.  ]])
>>> ccn
<9x9 sparse matrix of type '<type 'numpy.float64'>'
    with 24 stored elements in Compressed Sparse Row format>