问题描述
我正在尝试做一个小型作业项目,在这里我试图使用暹罗网络进行说话人识别。我正在尝试使用talos查找可能的最佳超参数。尝试使用一组超参数时,代码通过了OK,但是当我提供一组超参数时,我得到了MemoryError。
背景(不确定是否需要,但出于此原因,请允许我解释一下)
- 将音频分别分成5秒的块
- 计算每个音频样本的MFCC和MFCC_del和MFCC_del_del并将其全部合并,以便将每个音频样本转换为(24x939)数据点/矩阵
- 通过具有两组矩阵来创建暹罗网络,我们拥有一组所有“相似”配对的扬声器音频样本,而另一组是“相似扬声器”的样本。
- 总共6000个测试点和14886个训练点(现在不使用验证将在以后使用)
def yield_arrays_train(array_x_train_feat1,array_x_train_feat2,array_y_train,batch_size):
while 1:
for i in range(14886):
X_feat1_train = (array_x_train_feat1[i:i+batch_size,:,:].astype(np.float32))
X_feat2_train = (array_x_train_feat2[i:i+batch_size,:].astype(np.float32))
Y_train = (array_y_train[i:i+batch_size].astype(np.float32))
yield ([(np.array(X_feat1_train)),(np.array(X_feat2_train))],(np.array(Y_train)))
def yield_arrays_val(array_x_test_feat1,array_x_test_feat2,array_y_test,batch_size):
while 1:
for i in range(60):
X_feat1_test = (array_x_test_feat1[i:i+batch_size,:].astype(np.float32))
X_feat2_test = (array_x_test_feat2[i:i+batch_size,:].astype(np.float32))
Y_test = (array_y_test[i:i+batch_size].astype(np.float32))
yield ([(np.array(X_feat1_test)),(np.array(X_feat2_test))],(np.array(Y_test)))
train_generator=yield_arrays_train(xtrain_np_img1,xtrain_np_img2,y_train_numpy,6)
val_generator=yield_arrays_val(xtest_np_img1,xtest_np_img2,y_test_numpy,6)
def siamese(generator,validation_data):
W_init = tf.keras.initializers.he_normal(seed=100)
b_init = tf.keras.initializers.he_normal(seed=50)
input_shape = (24,939)
left_input = Input(input_shape)
right_input = Input(input_shape)
encoder = Sequential()
encoder.add(Conv1D(filters=8,kernel_size=6,padding='same',activation='relu',input_shape=input_shape,kernel_initializer=W_init,bias_initializer=b_init))
encoder.add(BatchNormalization())
encoder.add(Dropout(.1))
encoder.add(MaxPool1D())
encoder.add(Conv1D(filters=6,kernel_size=4,activation='relu'))
encoder.add(BatchNormalization())
encoder.add(Dropout(.1))
encoder.add(MaxPool1D())
encoder.add(Conv1D(filters=4,activation='relu'))
encoder.add(BatchNormalization())
encoder.add(Dropout(.1))
encoder.add(MaxPool1D())
encoder.add(Flatten())
encoder.add(Dense(10,activation='relu'))
encoder.add(Dropout(.1))
encoded_l = encoder(left_input)
encoded_r = encoder(right_input)
distance = Lambda(euclidean_distance,output_shape=eucl_dist_output_shape)([encoded_l,encoded_r])
adam = optimizers.Adam(lr=.1,beta_1=0.1,beta_2=0.999,decay=.1,amsgrad=False)
earlyStopping = EarlyStopping(monitor='loss',min_delta=0,patience=3,verbose=1,restore_best_weights=False)
callback_early_stop_reduceLROnPlateau=[earlyStopping]
model = Model([left_input,right_input],distance)
model.compile(loss=contrastive_loss,optimizer=adam,metrics=[accuracy])
model.summary()
#history = model.fit([(x_train[:,0]).astype(np.float32),(x_train[:,1]).astype(np.float32)],y_train,validation_data=([(x_val[:,(x_val[:,y_val),batch_size=params['batch_size'],epochs=params['epochs'],callbacks=callback_early_stop_reduceLROnPlateau)
history =model.fit_generator(generator=train_generator,validation_data=val_generator,steps_per_epoch=2481,epochs=5,validation_steps=1000,callbacks=callback_early_stop_reduceLROnPlateau,use_multiprocessing=False,workers=0)
return history,model
siamese(train_generator,val_generator)
输出:
Model: "model_6"
__________________________________________________________________________________________________
Layer (type) Output Shape Param # Connected to
==================================================================================================
input_11 (InputLayer) (None,24,939) 0
__________________________________________________________________________________________________
input_12 (InputLayer) (None,939) 0
__________________________________________________________________________________________________
sequential_6 (Sequential) (None,10) 45580 input_11[0][0]
input_12[0][0]
__________________________________________________________________________________________________
lambda_6 (Lambda) (None,1) 0 sequential_6[1][0]
sequential_6[2][0]
==================================================================================================
Total params: 45,580
Trainable params: 45,544
Non-trainable params: 36
__________________________________________________________________________________________________
Epoch 1/5
2481/2481 [==============================] - 29s 12ms/step - loss: 0.0056 - accuracy: 0.9986 - val_loss: 0.8333 - val_accuracy: 0.1667
Epoch 2/5
2481/2481 [==============================] - 28s 11ms/step - loss: nan - accuracy: 0.9993 - val_loss: nan - val_accuracy: 0.8333
Epoch 3/5
2481/2481 [==============================] - 28s 11ms/step - loss: nan - accuracy: 1.0000 - val_loss: nan - val_accuracy: 0.8333
Epoch 4/5
2481/2481 [==============================] - 28s 11ms/step - loss: nan - accuracy: 1.0000 - val_loss: nan - val_accuracy: 0.8333
Epoch 00004: early stopping
(<keras.callbacks.callbacks.History at 0x2969fa3fd88>,<keras.engine.training.Model at 0x2969f2a17c8>)
因此没有任何Talos或它正在处理一组随机超参数。现在我第一次使用Talos
p = {
'filter1':[1,2,4,6,8,12,16,24],'kernel_size1':[2,12],'filter3' : [1,8],'kernel_size3' : [1,'decay' :[.1,0.01,.001,.0001,.00001],'droprate1' :[.1,.2,.3],'filter2':[1,'kernel_size2':[2,'droprate4' : [.1,'droprate2' :[.1,'unit1': [10,36,64,128,256],'droprate3': [.1,'lr' :[(.1,.00001)],'batch_size' : [1,2],'epochs': [4,10] }
def siamese(generator,validation_data,params):
W_init = tf.keras.initializers.he_normal(seed=100)
b_init = tf.keras.initializers.he_normal(seed=50)
input_shape = (24,939)
left_input = Input(input_shape)
right_input = Input(input_shape)
encoder = Sequential()
encoder.add(Conv1D(filters=(params['filter1']),kernel_size=(params['kernel_size1']),bias_initializer=b_init))
encoder.add(BatchNormalization())
encoder.add(Dropout((params["droprate1"])))
encoder.add(MaxPool1D())
encoder.add(Conv1D(filters=(params["filter2"]),kernel_size=(params['kernel_size2']),activation='relu'))
encoder.add(BatchNormalization())
encoder.add(Dropout((params["droprate2"])))
encoder.add(MaxPool1D())
encoder.add(Conv1D(filters=(params["filter3"]),kernel_size=(params['kernel_size3']),activation='relu'))
encoder.add(BatchNormalization())
encoder.add(Dropout((params['droprate3'])))
encoder.add(MaxPool1D())
encoder.add(Flatten())
encoder.add(Dense((params['unit1']),activation='relu'))
encoder.add(Dropout((params['droprate4'])))
encoded_l = encoder(left_input)
encoded_r = encoder(right_input)
distance = Lambda(euclidean_distance,encoded_r])
adam = optimizers.Adam(lr=params['lr'],decay=params['decay'],callbacks=callback_early_stop_reduceLROnPlateau)
history=model.fit_generator(generator=train_generator,model
t=ta.Scan(x=[xtrain_np_img1,xtrain_np_img2],y=y_train_numpy,x_val=[xtest_np_img1,xtest_np_img2],y_val=y_test_numpy,model=siamese,params=p,experiment_name='exp_1')
我得到的错误是:
---------------------------------------------------------------------------
MemoryError Traceback (most recent call last)
<ipython-input-9-df856388a4bb> in <module>
1 #t=ta.Scan(x=xtrain_np_img1_img2,x_val=xtest_np_img1_img2,experiment_name='exp_1')
2
----> 3 t=ta.Scan(x=[xtrain_np_img1,experiment_name='exp_1')
~\anaconda3\envs\MyEnv\lib\site-packages\talos\scan\Scan.py in __init__(self,x,y,params,model,experiment_name,x_val,y_val,val_split,random_method,seed,performance_target,fraction_limit,round_limit,time_limit,boolean_limit,reduction_method,reduction_interval,reduction_window,reduction_threshold,reduction_metric,minimize_loss,disable_progress_bar,print_params,clear_session,save_weights)
194 # start runtime
195 from .scan_run import scan_run
--> 196 scan_run(self)
~\anaconda3\envs\MyEnv\lib\site-packages\talos\scan\scan_run.py in scan_run(self)
7
8 from .scan_prepare import scan_prepare
----> 9 self = scan_prepare(self)
10
11 # initiate the progress bar
~\anaconda3\envs\MyEnv\lib\site-packages\talos\scan\scan_prepare.py in scan_prepare(self)
28 round_limit=self.round_limit,29 time_limit=self.time_limit,---> 30 boolean_limit=self.boolean_limit
31 )
32
~\anaconda3\envs\MyEnv\lib\site-packages\talos\parameters\ParamSpace.py in __init__(self,param_keys,boolean_limit)
42
43 # create the parameter space
---> 44 self.param_space = self._param_space_creation()
45
46 # handle the boolean limits separately
~\anaconda3\envs\MyEnv\lib\site-packages\talos\parameters\ParamSpace.py in _param_space_creation(self)
133 if len(self.param_index) > 100000:
134
--> 135 final_grid = list(it.product(*self._params_temp))
136 out = np.array(final_grid,dtype='object')
137
MemoryError:
我的问题是,因为我不熟悉python /机器学习等
- 如何正确使用fit_generator(之前我尝试使用fit函数,但由于内存错误,我的Windows会死机,我改用try fit_generator,但仍然存在相同的错误)。我认为我编写的代码不正确?
- 我的笔记本电脑规格很难运行此代码吗? 我的规格 内存:16 GB Windows 10 64位 CPU:英特尔酷睿i5(四核) GPU:它内置了GTX1050,但我没有像我之前尝试的那样配置它,但是运气不佳
解决方法
暂无找到可以解决该程序问题的有效方法,小编努力寻找整理中!
如果你已经找到好的解决方法,欢迎将解决方案带上本链接一起发送给小编。
小编邮箱:dio#foxmail.com (将#修改为@)