问题描述
from sklearn.feature_extraction.text import CountVectorizer
all_features = vectorizer.fit_transform(df['text'].values.astype('U'))
vectorizer.vocabulary_
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test = train_test_split(vectorizer,df['intent'],test_size=0.3,random_state=88)
下面是错误。
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-27-5cd659a5da4e> in <module>
----> 1 X_train,random_state=88)
~\Anaconda3\lib\site-packages\sklearn\model_selection\_split.py in train_test_split(*arrays,**options)
2125 raise TypeError("Invalid parameters passed: %s" % str(options))
2126
-> 2127 arrays = indexable(*arrays)
2128
2129 n_samples = _num_samples(arrays[0])
~\Anaconda3\lib\site-packages\sklearn\utils\validation.py in indexable(*iterables)
290 """
291 result = [_make_indexable(X) for X in iterables]
--> 292 check_consistent_length(*result)
293 return result
294
~\Anaconda3\lib\site-packages\sklearn\utils\validation.py in check_consistent_length(*arrays)
250 """
251
--> 252 lengths = [_num_samples(X) for X in arrays if X is not None]
253 uniques = np.unique(lengths)
254 if len(uniques) > 1:
~\Anaconda3\lib\site-packages\sklearn\utils\validation.py in <listcomp>(.0)
250 """
251
--> 252 lengths = [_num_samples(X) for X in arrays if X is not None]
253 uniques = np.unique(lengths)
254 if len(uniques) > 1:
~\Anaconda3\lib\site-packages\sklearn\utils\validation.py in _num_samples(x)
193 if hasattr(x,'shape') and x.shape is not None:
194 if len(x.shape) == 0:
--> 195 raise TypeError("Singleton array %r cannot be considered"
196 " a valid collection." % x)
197 # Check that shape is returning an integer or default to len
TypeError: Singleton array array(CountVectorizer(stop_words='english'),dtype=object) cannot be considered a valid collection.
请帮我解决这个错误。我正在学习 here 的教程。我曾尝试从上述代码中查找错误,但似乎无法发现错误。
解决方法
使用 all_features
而不是 vectorizer
X_train,X_test,y_train,y_test = train_test_split(all_features,df['intent'],test_size=0.3,random_state=88)