FeatureTools TypeError:无法散列的类型:'set'

时间:2019-07-03 03:30:59

标签: featuretools feature-engineering

我正在尝试此功能工具代码:

features, feature_names = ft.dfs(entityset = es, target_entity = 'demo', 
          agg_primitives = ['count', 'max', 'time_since_first', 'median', 'time_since_last', 'avg_time_between',
                            'sum', 'mean'],
          trans_primitives = ['is_weekend', 'year', 'week', 'divide_by_feature', 'percentile'])

但是我有这个错误

TypeError                                 Traceback (most recent call last)
<ipython-input-17-89e925ff895d> in <module>
      3           agg_primitives = ['count', 'max', 'time_since_first', 'median', 'time_since_last', 'avg_time_between',
      4                             'sum', 'mean'],
----> 5           trans_primitives = ['is_weekend', 'year', 'week', 'divide_by_feature', 'percentile'])

~/.local/lib/python3.6/site-packages/featuretools/utils/entry_point.py in function_wrapper(*args, **kwargs)
     44                     ep.on_error(error=e,
     45                                 runtime=runtime)
---> 46                 raise e
     47 
     48             # send return value

~/.local/lib/python3.6/site-packages/featuretools/utils/entry_point.py in function_wrapper(*args, **kwargs)
     36                 # call function
     37                 start = time.time()
---> 38                 return_value = func(*args, **kwargs)
     39                 runtime = time.time() - start
     40             except Exception as e:

~/.local/lib/python3.6/site-packages/featuretools/synthesis/dfs.py in dfs(entities, relationships, entityset, target_entity, cutoff_time, instance_ids, agg_primitives, trans_primitives, groupby_trans_primitives, allowed_paths, max_depth, ignore_entities, ignore_variables, seed_features, drop_contains, drop_exact, where_primitives, max_features, cutoff_time_in_index, save_progress, features_only, training_window, approximate, chunk_size, n_jobs, dask_kwargs, verbose, return_variable_types)
    226                                                   n_jobs=n_jobs,
    227                                                   dask_kwargs=dask_kwargs,
--> 228                                                   verbose=verbose)
    229     return feature_matrix, features

~/.local/lib/python3.6/site-packages/featuretools/computational_backends/calculate_feature_matrix.py in calculate_feature_matrix(features, entityset, cutoff_time, instance_ids, entities, relationships, cutoff_time_in_index, training_window, approximate, save_progress, verbose, chunk_size, n_jobs, dask_kwargs)
    265                                                  cutoff_df_time_var=cutoff_df_time_var,
    266                                                  target_time=target_time,
--> 267                                                  pass_columns=pass_columns)
    268 
    269     feature_matrix = pd.concat(feature_matrix)

~/.local/lib/python3.6/site-packages/featuretools/computational_backends/calculate_feature_matrix.py in linear_calculate_chunks(chunks, feature_set, approximate, training_window, verbose, save_progress, entityset, no_unapproximated_aggs, cutoff_df_time_var, target_time, pass_columns)
    496                                           no_unapproximated_aggs,
    497                                           cutoff_df_time_var,
--> 498                                           target_time, pass_columns)
    499         feature_matrix.append(_feature_matrix)
    500         # Do a manual garbage collection in case objects from calculate_chunk

~/.local/lib/python3.6/site-packages/featuretools/computational_backends/calculate_feature_matrix.py in calculate_chunk(chunk, feature_set, entityset, approximate, training_window, verbose, save_progress, no_unapproximated_aggs, cutoff_df_time_var, target_time, pass_columns)
    341                                            ids,
    342                                            precalculated_features=precalculated_features_trie,
--> 343                                            training_window=window)
    344 
    345             id_name = _feature_matrix.index.name

~/.local/lib/python3.6/site-packages/featuretools/computational_backends/utils.py in wrapped(*args, **kwargs)
     35         def wrapped(*args, **kwargs):
     36             if save_progress is None:
---> 37                 r = method(*args, **kwargs)
     38             else:
     39                 time = args[0].to_pydatetime()

~/.local/lib/python3.6/site-packages/featuretools/computational_backends/calculate_feature_matrix.py in calc_results(time_last, ids, precalculated_features, training_window)
    316                                               ignored=all_approx_feature_set)
    317 
--> 318             matrix = calculator.run(ids)
    319             return matrix
    320 

~/.local/lib/python3.6/site-packages/featuretools/computational_backends/feature_set_calculator.py in run(self, instance_ids)
    100                                             precalculated_trie=self.precalculated_features,
    101                                             filter_variable=target_entity.index,
--> 102                                             filter_values=instance_ids)
    103 
    104         # The dataframe for the target entity should be stored at the root of

~/.local/lib/python3.6/site-packages/featuretools/computational_backends/feature_set_calculator.py in _calculate_features_for_entity(self, entity_id, feature_trie, df_trie, full_entity_df_trie, precalculated_trie, filter_variable, filter_values, parent_data)
    187                                     columns=columns,
    188                                     time_last=self.time_last,
--> 189                                     training_window=self.training_window)
    190 
    191         # Step 2: Add variables to the dataframe linking it to all ancestors.

~/.local/lib/python3.6/site-packages/featuretools/entityset/entity.py in query_by_values(self, instance_vals, variable_id, columns, time_last, training_window)
    271 
    272         if columns is not None:
--> 273             df = df[columns]
    274 
    275         return df

~/.local/lib/python3.6/site-packages/pandas/core/frame.py in __getitem__(self, key)
   2686             return self._getitem_multilevel(key)
   2687         else:
-> 2688             return self._getitem_column(key)
   2689 
   2690     def _getitem_column(self, key):

~/.local/lib/python3.6/site-packages/pandas/core/frame.py in _getitem_column(self, key)
   2693         # get column
   2694         if self.columns.is_unique:
-> 2695             return self._get_item_cache(key)
   2696 
   2697         # duplicate columns & possible reduce dimensionality

~/.local/lib/python3.6/site-packages/pandas/core/generic.py in _get_item_cache(self, item)
   2485         """Return the cached item, item represents a label indexer."""
   2486         cache = self._item_cache
-> 2487         res = cache.get(item)
   2488         if res is None:
   2489             values = self._data.get(item)

TypeError: unhashable type: 'set'

我还尝试了最简单的深度特征综合(dfs)代码,如下所示,但仍然遇到相同的错误

features, feature_names = ft.dfs(entityset = es, target_entity = 'demo')

我不太确定为什么会遇到此错误,我们非常感谢您从此处进行的任何帮助或建议。 在此先感谢您的帮助!

2 个答案:

答案 0 :(得分:1)

我找到了一个解决方案,我的当前版本中包含由FeatureTools团队修复的错误。只需直接从master运行pip install

pip install --upgrade https://github.com/featuretools/featuretools/zipball/master

答案 1 :(得分:0)

此问题已修复,已在Featuretools 0.9.1中发布。如果您升级到Featuretools的最新版本,它将消失。