我想出了这个函数来查找遇到某个type
(在熊猫数据框中的一列)的可能性,我想知道是否有更有效的方法来构造参数而不是整个查询,而只能是类型名称def P(type = ""):
?:
#Get count of each type
type_counts = data.groupby('type').count().drop('first_word', axis = 1) #get rid of unwanted data
type_counts = type_counts.reset_index()
#Rename 'name' column to appropriate name count
type_counts.rename(columns = {'name':'count'}, inplace = True)
#Types: company, drug, movie, person, place
prob_company = P(data.query('type =="company"'))
prob_drug = P(data.query('type =="drug"'))
prob_movie = P(data.query('type == "movie"'))
prob_person = P(data.query('type == "person"'))
prob_place = P(data.query('type == "place"'))
prob_company + prob_drug + prob_movie + prob_person + prob_place