问题描述
我正在完成我自己的沙盒项目,想要尝试和实施 NLP,但结果是线性回归。作为参考,我正在使用的数据集来自 Kaggle wine-reviews,其中包含葡萄酒评论和 1 到 100 的相应分数,因此我使用线性回归而不是分类。
但我收到一条错误消息,我不确定它是否是数据类型或维度问题的结果,而且我不起诉为什么或如何解决它。
我将提供下面的代码,以及一些显示某些对象尺寸的中间结果,因为我假设这可能有助于解决这个问题。
df = pd.read_csv('winemag-data_first150k.csv',encoding='ISO-8859-1')
y = df['points'].astype(int)
X = df['description'].astype(str)
# split up the data
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.33)
MAX_VOCAB_SIZE = 35000
tokenizer = Tokenizer(num_words=MAX_VOCAB_SIZE)
tokenizer.fit_on_texts(X_train)
sequences_train = tokenizer.texts_to_sequences(X_train)
sequences_test = tokenizer.texts_to_sequences(X_test)
word2idx = tokenizer.word_index
V = len(word2idx)
print('Found %s unique tokens.' % V)
Found 33012 unique tokens.
data_train = pad_sequences(sequences_train)
print('Shape of data train tensor:',data_train.shape)
# get sequence length
T = data_train.shape[1]
Shape of data train tensor: (101123,136)
data_test = pad_sequences(sequences_test,maxlen=T)
print('Shape of data test tensor:',data_test.shape)
Shape of data test tensor: (49807,136)
# Create the model
# We get to choose embedding dimensionality
D = 20
# Hidden state dimensionality
M = 15
i = Input(shape=(T,))
x = Embedding(V + 1,D)(i)
x = LSTM(M,return_sequences=True)(x)
x = GlobalMaxPooling1D()(x)
x = Dense(1)(x)
model = Model(i,x)
model.compile(optimizer='adam',loss='mse')
# learning rate scheduler
def schedule(epoch,lr):
if epoch >= 50:
return 0.0001
return 0.001
scheduler = tf.keras.callbacks.LearningRateScheduler(schedule)
# Train the model
r = model.fit(X,epochs=200,callbacks=[scheduler])
然后我收到错误消息以及关于维度的警告:
Epoch 1/200
WARNING:tensorflow:Model was constructed with shape (None,136) for input Tensor("input_10:0",shape=(None,136),dtype=float32),but it was called on an input with incompatible shape (None,1).
WARNING:tensorflow:Model was constructed with shape (None,1).
---------------------------------------------------------------------------
UnimplementedError Traceback (most recent call last)
<ipython-input-121-0f68916ec23b> in <module>
13
14 # Train the model
---> 15 r = model.fit(X,callbacks=[scheduler])
~\anaconda3\envs\newenvt\lib\site-packages\tensorflow\python\keras\engine\training.py in _method_wrapper(self,*args,**kwargs)
106 def _method_wrapper(self,**kwargs):
107 if not self._in_multi_worker_mode(): # pylint: disable=protected-access
--> 108 return method(self,**kwargs)
109
110 # Running inside `run_distribute_coordinator` already.
~\anaconda3\envs\newenvt\lib\site-packages\tensorflow\python\keras\engine\training.py in fit(self,x,batch_size,epochs,verbose,callbacks,validation_split,validation_data,shuffle,class_weight,sample_weight,initial_epoch,steps_per_epoch,validation_steps,validation_batch_size,validation_freq,max_queue_size,workers,use_multiprocessing)
1096 batch_size=batch_size):
1097 callbacks.on_train_batch_begin(step)
-> 1098 tmp_logs = train_function(iterator)
1099 if data_handler.should_sync:
1100 context.async_wait()
~\anaconda3\envs\newenvt\lib\site-packages\tensorflow\python\eager\def_function.py in __call__(self,**kwds)
778 else:
779 compiler = "nonXla"
--> 780 result = self._call(*args,**kwds)
781
782 new_tracing_count = self._get_tracing_count()
~\anaconda3\envs\newenvt\lib\site-packages\tensorflow\python\eager\def_function.py in _call(self,**kwds)
838 # Lifting succeeded,so variables are initialized and we can run the
839 # stateless function.
--> 840 return self._stateless_fn(*args,**kwds)
841 else:
842 canon_args,canon_kwds = \
~\anaconda3\envs\newenvt\lib\site-packages\tensorflow\python\eager\function.py in __call__(self,**kwargs)
2827 with self._lock:
2828 graph_function,args,kwargs = self._maybe_define_function(args,kwargs)
-> 2829 return graph_function._filtered_call(args,kwargs) # pylint: disable=protected-access
2830
2831 @property
~\anaconda3\envs\newenvt\lib\site-packages\tensorflow\python\eager\function.py in _filtered_call(self,kwargs,cancellation_manager)
1841 `args` and `kwargs`.
1842 """
-> 1843 return self._call_flat(
1844 [t for t in nest.flatten((args,kwargs),expand_composites=True)
1845 if isinstance(t,(ops.Tensor,~\anaconda3\envs\newenvt\lib\site-packages\tensorflow\python\eager\function.py in _call_flat(self,captured_inputs,cancellation_manager)
1921 and executing_eagerly):
1922 # No tape is watching; skip to running the function.
-> 1923 return self._build_call_outputs(self._inference_function.call(
1924 ctx,cancellation_manager=cancellation_manager))
1925 forward_backward = self._select_forward_and_backward_functions(
~\anaconda3\envs\newenvt\lib\site-packages\tensorflow\python\eager\function.py in call(self,ctx,cancellation_manager)
543 with _InterpolateFunctionError(self):
544 if cancellation_manager is None:
--> 545 outputs = execute.execute(
546 str(self.signature.name),547 num_outputs=self._num_outputs,~\anaconda3\envs\newenvt\lib\site-packages\tensorflow\python\eager\execute.py in quick_execute(op_name,num_outputs,inputs,attrs,name)
57 try:
58 ctx.ensure_initialized()
---> 59 tensors = pywrap_tfe.TFE_Py_Execute(ctx._handle,device_name,op_name,60 inputs,num_outputs)
61 except core._NotOkStatusException as e:
UnimplementedError: Cast string to float is not supported
[[node functional_11/Cast (defined at <ipython-input-121-0f68916ec23b>:15) ]] [Op:__inference_train_function_17206]
Function call stack:
train_function
我不完全确定需要更改什么,但任何建议将不胜感激。
解决方法
来自评论
将 X
传递给 model.fit
,它实际上具有字符串值,一个神经网络
网络不能是输入字符串值(转述自史努比博士)