好吧,我已经对这个问题进行了很多研究,我知道NumPy只支持齐次矩阵。
我在Python中使用NLTK软件包来处理一些语料库语言学数据,并且只想创建一个包含不同字符串的矩阵作为列名称'和实际数据值(浮动)作为矩阵的其余部分。
到目前为止,我已经制作了两个矩阵,一个是字符串,另一个是浮点数,并使用vstack将它们放在一起。一切都很好,花花公子,直到我尝试使用NumPy的savetxt()方法和这个新的'矩阵'堆叠矩阵,但它不会写.csv文件,因为矩阵不像矩阵那样'因为它不是同质的。 FML。
我真的希望能够将NumPy用于处理实际数据点的所有强大方法,但我无法得到一个奇怪的东西。字符串数组放在矩阵的顶部,变成.csv。有任何想法吗?我真的很想不要再通过Python的列表列表方法来处理多维数组了。
以下是代码:
import os.path
import sys
import nltk
from numpy import *
from nltk.corpus.reader import CHILDESCorpusReader
from nltk.probability import ConditionalFreqDist, FreqDist
n_rows = 12
n_cols = 19
init_row = 0
init_col = 0
neg_words = ["Age", "MLU", "All Tokens","no","not","don't","can't","won't","isn't","wasn't","wouldn't","shouldn't","couldn't","didn't","haven't","aren't","haven't","hasn't","doesn't"]
Matrix_headers = array(range(len(neg_words)), dtype='a12')
Matrix_values = zeros(n_rows*n_cols).reshape((n_rows, n_cols)) #the matrix with the data points (floats)
for entry in range(len(neg_words)):
Matrix_headers[entry] = neg_words[entry]
p = neg_words
q = Matrix_values
Matrix = vstack([p,q])
out_name = "/Users/nicholasmoores/Documents/Research/neg_table.csv"
savetxt(out_name, Matrix, fmt='%.3e',delimiter = "\t")
raw_input("\n\nPress the enter key to exit.")
答案 0 :(得分:3)
您可以使用structured array
e.g:
>>> ym = np.zeros(len(neg_words), dtype=[('heads','a14'),('vals','f4',(n_rows,))])
array([('', [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]),
('', [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]),
('', [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]),
('', [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]),
('', [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]),
('', [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]),
('', [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]),
('', [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]),
('', [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]),
('', [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]),
('', [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]),
('', [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]),
('', [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]),
('', [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]),
('', [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]),
('', [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]),
('', [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]),
('', [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]),
('', [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0])],
dtype=[('heads', 'S14'), ('vals', '<f4', (12,))])
设置标题值:
ym['heads'] = neg_words
要访问标题:
>>> ym['heads']
array(['Age', 'MLU', 'All Tokens', 'no', 'not', "don't", "can't",
"won't", "isn't", "wasn't", "wouldn't", "shouldn't", "couldn't",
"didn't", "haven't", "aren't", "haven't", "hasn't", "doesn't"],
dtype='|S14')
同样,要访问值
ym['vals']