'NumpyArrayIterator' 中的输入数据应为 4 级您传递了一个具有形状的数组', (120000, 0)

问题描述

我制作了一个图像分类系统,该系统使用 PlantVillage 数据集检测植物叶片病害。我创建了从预处理到模型构建的整个过程,但是当我尝试运行程序时,会弹出上述错误。现在我尝试了很多东西,坦率地说,我不想弄乱 colab 中的数据集,所以任何人都可以帮我解决这个问题,我将非常感激。 这是我代码的预处理部分。

#dummy data:
df = pd.DataFrame({'age': np.random.choice(list(range(100)) + [None]*20,100)})

pd.qcut(df['age'].fillna(np.nan),5)


0     (64.0,82.4]
1     (15.8,39.0]
2              NaN
3     (39.0,64.0]
4     (15.8,39.0]
          ...     
95    (15.8,39.0]
96    (15.8,39.0]
97    (39.0,64.0]
98    (64.0,82.4]
99    (82.4,98.0]
Name: age,Length: 100,dtype: category
Categories (5,interval[float64]): [(0.999,15.8] < (15.8,39.0] < (39.0,64.0] < (64.0,82.4] < (82.4,98.0]]

现在我用 keras 构建了模型并添加了层,直到这部分都被正确识别。

import numpy as np
import pickle
import cv2
import os
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow import keras
from os import listdir
from sklearn.preprocessing import LabelBinarizer
from keras.models import Sequential
from keras.layers.normalization import Batchnormalization
from keras.layers.convolutional import Conv2D
from keras.layers.convolutional import MaxPooling2D
from keras.layers.core import Activation,Flatten,Dropout,Dense
from keras import backend as K
from keras.preprocessing.image import ImageDataGenerator
from keras.optimizers import Adam
from keras.preprocessing import image
from keras.preprocessing.image import img_to_array
from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.model_selection import train_test_split

from google.colab import drive
drive.mount('/content/drive')

#Resize the image to match the input shape of the layer
default_image_size = tuple((256,256))
image_size = 0

#dataset directory
directory_root = '/content/drive/MyDrive/proj/PlantVillage'

width = 256
height = 256
depth = 3

def convert_image_to_array(image_dir):
    #loads an image from the directory if the image exists
    image = cv2.imread(image_dir)

    if image is not None:
        #changes the dimensions of the image,width or height or both and also maintains the original aspect ratio in the resized version
        image = cv2.resize(image,default_image_size)
        return img_to_array(image)
    else:
        #if the image does not exist,it returns an empty array
        return np.array([])

image_list,label_list = [],[]

print("[INFO] Loading Images...")
root_dir = listdir(directory_root)

for plant_folder in root_dir:
    plant_disease_folderlist = listdir(f"{directory_root}/{plant_folder}")

    for plant_disease_folder in plant_disease_folderlist:
        print(f"[INFO] Processing {plant_disease_folder} ...")
        plant_disease_image_list = listdir(f"{directory_root}/{plant_folder}/")

        for image in plant_disease_image_list[:200]:
            image_directory = f"{directory_root}/{plant_folder}/{plant_disease_folder}/{image}"
            if image_directory.endswith(".jpg") == True or image_directory.endswith(".JPG") == True:
                image_list.append(convert_image_to_array(image_directory))
                label_list.append(plant_disease_folder)

print("[INFO] Image Loading Complete!")  

#transforms the resized image data into numpy array
np_image_list = np.array(image_list,dtype = np.float16) / 255.0

#checks for the number of images loaded for training
image_size = len(image_list)
print(f"Total number of images: {image_size}")

#each class or label is assigned a unique value for training
label_binarizer = LabelBinarizer()
image_labels = label_binarizer.fit_transform(label_list)

#dumping the labels in the pkl file so it can be used for predictions 
pickle.dump(label_binarizer,open('plantlabel.pkl','wb'))
n_classes = len(label_binarizer.classes_)

print("Total number of classes: ",n_classes)
print("Labels: ",label_binarizer.classes_)

print("[INFO] Splitting Data Into Training and Testing Set...")
#splitting the data with a 0.2 split ratio
x_train,x_test,y_train,y_test = train_test_split(np_image_list,image_labels,test_size=0.2,random_state = 42)

#data augmentation is used to generate more images in the dataset. The different operations are applied on the image to diversify the dataset so it performs well with unseen images
#only the object is created here,this will be used later in the training
aug = ImageDataGenerator(rotation_range=25,width_shift_range=0.1,height_shift_range=0.1,shear_range=0.2,zoom_range=0.2,horizontal_flip=True,fill_mode="nearest")

EPOCHS = 10 LR = 1e-3 BATCH_SIZE = 32 WIDTH = 256 HEIGHT = 256 DEPTH = 3 #creating the model inputShape = (HEIGHT,WIDTH,DEPTH) chanDim = -1 if K.image_data_format() == "channels_first": inputShape = (DEPTH,HEIGHT,WIDTH) chanDim = -1 model = Sequential() model.add(Conv2D(32,(3,3),padding = "same",input_shape = inputShape)) model.add(Activation("relu")) model.add(Batchnormalization(axis = chanDim)) model.add(MaxPooling2D(pool_size = (3,3))) model.add(Dropout(0.25)) model.add(Conv2D(64,padding = "same")) model.add(Activation("relu")) model.add(Batchnormalization(axis = chanDim)) model.add(MaxPooling2D(pool_size = (2,2))) model.add(Dropout(0.25)) model.add(Conv2D(128,2))) model.add(Dropout(0.25)) model.add(Flatten()) model.add(Dense(64)) model.add(Activation("relu")) model.add(Batchnormalization()) model.add(Dropout(0.5)) model.add(Dense(n_classes)) model.add(Activation("softmax")) model.summary() opt = Adam(lr = LR,decay = LR/EPOCHS) model.compile(loss="binary_crossentropy",optimizer=opt,metrics=["accuracy"]) print("[INFO] Training Begins...") history = model.fit_generator( aug.flow(x_train,batch_size=BATCH_SIZE),validation_data=(x_test,y_test),steps_per_epoch=len(x_train) // BATCH_SIZE,epochs=EPOCHS,verbose=1 ) print("[INFO] Training Complete...") 部分,发生错误错误如下。

aug.flow(x_train,batch_size=BATCH_SIZE,...)

我只训练了 1500 张图像,因为我的项目的目的只是构建一个模型。我只需要完成培训。我希望有人能帮我解决这个问题。谢谢。

解决方法

暂无找到可以解决该程序问题的有效方法,小编努力寻找整理中!

如果你已经找到好的解决方法,欢迎将解决方案带上本链接一起发送给小编。

小编邮箱:dio#foxmail.com (将#修改为@)