在python中构建列表时出错

时间:2017-03-20 06:20:11

标签: python list numpy nested-lists

我'我正在尝试在python中构建一个列表。该列表包含列表。单个内部列表包括音频信号的各种功能,如标准偏差,平均频率等。但是当我打印外部列表时,我得到一个空白列表。这是我的代码。

from scipy.io.wavfile import read  # to read wavfiles
import matplotlib.pyplot as plotter
from sklearn.tree import DecisionTreeClassifier as dtc
import numpy as np
import os
import scipy
import math

np.set_printoptions(precision=4)

def __init__(self, criterion="gini", splitter="best", max_depth=None, min_samples_split=10, min_samples_leaf=1, min_weight_fraction_leaf=0, max_features=None, random_state=None, max_leaf_nodes=None, min_impurity_split=1e-7, class_weight=None, presort=False):

        super(DecisionTreeClassifier, self).__init__(criterion=criterion, splitter=splitter, max_depth=max_depth, min_samples_split=min_samples_split, min_samples_leaf=min_samples_leaf, min_weight_fraction_leaf=min_weight_fraction_leaf, max_features=max_features, max_leaf_nodes=max_leaf_nodes, class_weight=class_weight, random_state=random_state, min_impurity_split=min_impurity_split, presort=presort)


fList = []  #feature list
mfList = [] #main feature list
labels = ["angry", "angry", "angry", "angry", "angry", "angry", "fear", "fear", "happy", "happy", "happy", "sad", "sad", "sad", "sad", "sad", "sad", "sad", "sad", "sad", "sad", "sad", "sad", "sad", "sad"]
label = [1,2,3,4,5,6,7,8,9,10]

def stddev(lst,mf):
    sum1 = 0
    len1 = len(lst)-1
    for i in range(len(lst)):
        sum1 += pow((lst[i]-mf),2)
    sd = np.sqrt(sum1/len1)
    fList.append(sd)

def find_iqr(num,num_array=[],*args):
    num_array.sort()
    l=int((int(num)+1)/4)
    m=int((int(num)+1)/2)
    med=num_array[m]
    u=int(3*(int(num)+1)/4)
    fList.append(num_array[l])  #first quantile
    fList.append(med)   #median
    fList.append(num_array[u])  #third quantile
    fList.append(num_array[u]-num_array[l]) #inter quantile range


def build(path1):
    dirlist=os.listdir(path1)
    n=1
    mf=0
    for name in dirlist:
        path=path1+name
        print ("File ",n)
        fs, x = read(path) #fs will have sampling rate and x will have sample #
        #print ("The sampling rate: ",fs)
        #print ("Size: ",x.size)
        #print ("Duration: ",x.size/float(fs),"s")

        '''
        plotter.plot(x)
        plotter.show() #x-axis is in samples 
        t = np.arange(x.size)/float(fs) #creating an array with values as time w.r.t samples
        plotter.plot(t)   #plot t w.r.t x
        plotter.show()
        y = x[100:600]
        plotter.plot(y)
        plotter.show()  # showing close-up of samples 
        '''
        j=0     
        med=0
        for i in x:
            j=j+1
            mf=mf+i
        mf=mf/j
        fList.append(np.max(abs(x)))    #amplitude
        fList.append(mf)    #mean frequency
        find_iqr(j,x)
        fList.append((3*med)-(2*mf))    #mode
        stddev(x,mf)
        #fftc = np.fft.rfft(x).tolist()
        #mr = 20*scipy.log10(scipy.absolute(x)).tolist()
        #fList.append(fftc) #1D dft
        #fList.append(mr)   #magnitude response
        mfList.append(fList)
        fList[:] = []
        n=n+1

path1 = '/home/vishnu/Desktop/Trainingsamples/'
path2 = '/home/vishnu/Desktop/TestSamples/'
clf = dtc() # this class is used to make decision tree
build(path1)
print(mfList)
clf.fit(mfList,label)
mfList[:] = []  #clear mflist
tlist = build(path2)
res = clf.predict(tlist)
print(res)

以下是我的输出屏幕:

('File ', 1)
SA1.py:50: RuntimeWarning: invalid value encountered in sqrt
  sd = np.sqrt(sum1/len1)
('File ', 2)
('File ', 3)
('File ', 4)
('File ', 5)
('File ', 6)
('File ', 7)
('File ', 8)
('File ', 9)
('File ', 10)
[[], [], [], [], [], [], [], [], [], []]
Traceback (most recent call last):
  File "SA1.py", line 111, in <module>
    clf.fit(mfList,label)
  File "/home/vishnu/.local/lib/python2.7/site-packages/sklearn/tree/tree.py", line 739, in fit
    X_idx_sorted=X_idx_sorted)
  File "/home/vishnu/.local/lib/python2.7/site-packages/sklearn/tree/tree.py", line 122, in fit
    X = check_array(X, dtype=DTYPE, accept_sparse="csc")
  File "/home/vishnu/.local/lib/python2.7/site-packages/sklearn/utils/validation.py", line 424, in check_array
    context))
ValueError: Found array with 0 feature(s) (shape=(10, 0)) while a minimum of 1 is required.

此处可以看到行print(mfList)打印输出[[], [], [], [], [], [], [], [], [], []]。这是一个空列表列表。我的错在哪里?请指导。

2 个答案:

答案 0 :(得分:0)

问题来自最后调用的fList[:] = []。我做了一个小例子来测试它:

l = []
ml = []

def f(x):
    for i in range(0, x):
        l.append(i)
    ml.append(l)
    l[:] = []

f(10)
f(5)
print(ml)

这会打印包含两个空列表的ml

>>> [[], []]

如果我删除l[:]=[]并将其替换为l = [],我会在ml内找到包含其内容的两个列表:

>>> [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9], [0, 1, 2, 3, 4]]

fList[:]=[]表示&#34;将fList内的所有项目替换为空项目&#34;。您正在使用此处的参考资料,只是因为您已将fList附加到该范围内的mfList并不意味着您仍然无法通过fList访问这些元素。因此,如果您将fList中的元素替换为新的元素(在本例中为[]),它也会影响mfList

答案 1 :(得分:0)

你在第103行有一个错误

eval

会发生什么?您可以通过 mfList.append(fList)将fList附加到mfList。 fList 通过引用调用。因此,您附加指向值的指针,而不是应对值。如果您运行上面的代码片段,则删除指针引用的这些值。因此mfList也是空的。

相反,您可以使用以下剪辑:

test_df.groupby('group').agg('sum').eval('cnt / total')

group
A    0.555556
B    0.333333
dtype: float64