问题描述
包含数据(bank-full.csv)的csv文件由Google在以下地址提供:https://drive.google.com/drive/folders/1cNtP4iDyGhF620ZbmJdmJWYQrRgJTCum
我的代码如下:
bank_dataframe = pd.read_csv('bank-full.csv',delimiter=';')
features = ['age','job','marital','education','default','balance','housing','loan','contact','campaign','pdays','poutcome']
labels = ['y']
bank_dataframe = bank_dataframe.filter(features + labels)
from sklearn.preprocessing import LabelBinarizer
encoder = LabelBinarizer()
categorical_features = ['default','poutcome']
for feature in categorical_features:
bank_dataframe[feature] = tuple(encoder.fit_transform(bank_dataframe[feature]))
bank_dataset = Dataset.from_tensor_slices(bank_dataframe)
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
~\AppData\Roaming\Python\python37\site-packages\tensorflow\python\data\util\structure.py in normalize_element(element)
92 try:
---> 93 spec = type_spec_from_value(t,use_fallback=False)
94 except TypeError:
~\AppData\Roaming\Python\python37\site-packages\tensorflow\python\data\util\structure.py in type_spec_from_value(element,use_fallback)
465 raise TypeError("Could not build a typespec for %r with type %s" %
--> 466 (element,type(element).__name__))
467
TypeError: Could not build a typespec for age job marital education \
0 26 (0,1,0) single (0,0)
1 37 (0,0)
2 31 (1,0)
3 47 (0,0) married (0,0)
4 36 (0,0)
... ... ... ... ...
45206 51 (1,0)
45207 59 (0,0)
45208 29 (0,0)
45209 43 (0,0)
45210 51 (0,0)
default balance housing loan contact campaign pdays \
0 (0,) 2786 (0,) (0,) (1,0) 2 72
1 (0,) 331 (1,0) 3 -1
2 (0,) 92 (1,0) 2 -1
3 (0,) 1568 (1,0) 1 262
4 (0,) 24 (1,0) 1 154
... ... ... ... ... ... ... ...
45206 (0,) 423 (1,0) 1 90
45207 (0,) 3800 (0,0) 1 -1
45208 (0,) 65 (1,0) 14 -1
45209 (0,) 241 (0,0) 10 -1
45210 (0,) 516 (1,0) 1 363
poutcome y
0 (0,0) yes
1 (0,1) no
2 (0,1) no
3 (0,0) yes
4 (1,0) no
... ... ...
45206 (1,0) no
45207 (0,1) no
45208 (0,1) no
45209 (0,1) no
45210 (1,0) no
[45211 rows x 13 columns] with type DataFrame
During handling of the above exception,another exception occurred:
ValueError Traceback (most recent call last)
<ipython-input-83-d5b55dc9ba50> in <module>
1 # Convert the DataFrame to a Dataset
2
----> 3 bank_dataset = Dataset.from_tensor_slices(bank_dataframe)
~\AppData\Roaming\Python\python37\site-packages\tensorflow\python\data\ops\dataset_ops.py in from_tensor_slices(tensors)
680 Dataset: A `Dataset`.
681 """
--> 682 return TensorSliceDataset(tensors)
683
684 class _GeneratorState(object):
~\AppData\Roaming\Python\python37\site-packages\tensorflow\python\data\ops\dataset_ops.py in __init__(self,element)
2999 def __init__(self,element):
3000 """See `Dataset.from_tensor_slices()` for details."""
-> 3001 element = structure.normalize_element(element)
3002 batched_spec = structure.type_spec_from_value(element)
3003 self._tensors = structure.to_batched_tensor_list(batched_spec,element)
~\AppData\Roaming\Python\python37\site-packages\tensorflow\python\data\util\structure.py in normalize_element(element)
96 # the value. As a fallback try converting the value to a tensor.
97 normalized_components.append(
---> 98 ops.convert_to_tensor(t,name="component_%d" % i))
99 else:
100 if isinstance(spec,sparse_tensor.SparseTensorSpec):
~\AppData\Roaming\Python\python37\site-packages\tensorflow\python\framework\ops.py in convert_to_tensor(value,dtype,name,as_ref,preferred_dtype,dtype_hint,ctx,accepted_result_types)
1497
1498 if ret is None:
-> 1499 ret = conversion_func(value,dtype=dtype,name=name,as_ref=as_ref)
1500
1501 if ret is NotImplemented:
~\AppData\Roaming\Python\python37\site-packages\tensorflow\python\framework\constant_op.py in _constant_tensor_conversion_function(v,as_ref)
336 as_ref=False):
337 _ = as_ref
--> 338 return constant(v,name=name)
339
340
~\AppData\Roaming\Python\python37\site-packages\tensorflow\python\framework\constant_op.py in constant(value,shape,name)
262 """
263 return _constant_impl(value,verify_shape=False,--> 264 allow_broadcast=True)
265
266
~\AppData\Roaming\Python\python37\site-packages\tensorflow\python\framework\constant_op.py in _constant_impl(value,verify_shape,allow_broadcast)
273 with trace.Trace("tf.constant"):
274 return _constant_eager_impl(ctx,value,verify_shape)
--> 275 return _constant_eager_impl(ctx,verify_shape)
276
277 g = ops.get_default_graph()
~\AppData\Roaming\Python\python37\site-packages\tensorflow\python\framework\constant_op.py in _constant_eager_impl(ctx,verify_shape)
298 def _constant_eager_impl(ctx,verify_shape):
299 """Implementation of eager constant."""
--> 300 t = convert_to_eager_tensor(value,dtype)
301 if shape is None:
302 return t
~\AppData\Roaming\Python\python37\site-packages\tensorflow\python\framework\constant_op.py in convert_to_eager_tensor(value,dtype)
96 dtype = dtypes.as_dtype(dtype).as_datatype_enum
97 ctx.ensure_initialized()
---> 98 return ops.EagerTensor(value,ctx.device_name,dtype)
99
100
ValueError: Failed to convert a NumPy array to a Tensor (Unsupported object type int).
如果我从消息中很好地理解,Tensorflow很难识别某些数据类型并将其转换为张量。让我知道您的想法,是什么引起了问题以及如何纠正它。
解决方法
尝试以下代码:
import pandas as pd
bank_dataframe = pd.read_csv('bank.csv',delimiter=';')
features = ['age','job','marital','education','default','balance','housing','loan','contact','campaign','pdays','poutcome']
labels = ['y']
bank_dataframe = bank_dataframe.filter(features + labels)
encoder = LabelBinarizer()
categorical_features = ['default','poutcome','y']
# Remove 'y' if you need to.
# But don't forget to use get_dummies on it some other time
# otherwise you will need another way to turn it into a tf.data.Dataset
bank_dataframe = pd.get_dummies(data=bank_dataframe,columns=categorical_features)
bank_dataset = tf.data.Dataset.from_tensor_slices(bank_dataframe)
不使用for循环进行一键编码。使用内置的熊猫get_dummies,它可以为您完成一行任务。您不需要为此使用LabelBinarizer。
如果不清楚,请向我询问详细信息。