喀拉斯语的预测和预测的发电机不匹配

时间:2019-05-04 16:50:25

标签: keras

我正在使用InceptionResNetV2网络进行迁移学习。 数据是医学图像集。 我在训练后保存了模型,然后将其用于测试数据,该数据是2类数据。问题在于,当我使用keras预报_生成器进行预测时,我获得了不同的准确性,甚至概率都不匹配。

这是我的代码段的一部分:

from __future__ import print_function
import numpy as np
import matplotlib.pyplot as plt
from sklearn.utils import class_weight
from keras.preprocessing.image import ImageDataGenerator, image,load_img
from keras import backend as K
from keras import models
from keras import layers
from keras import optimizers
from keras.models import load_model
from keras.models import model_from_json
# set image dimension for Conv layer etc based on tensor flow or theano
K.set_image_dim_ordering('tf')
from keras.applications import InceptionResNetV2
from keras.applications.inception_resnet_v2 import preprocess_input
from keras.callbacks import ModelCheckpoint
import os
import sys
import glob
import argparse

W = 299
H = 299
nc = 3
nclass = 2
load_mod = 1
if load_mod:
    #model = load_model('diabetic_v9.h5')
    json_file = open("diabetic_v9.json", 'r')
    loaded_model_json = json_file.read()
    json_file.close()
    model = model_from_json(loaded_model_json)
    model.load_weights("diabetic_v9_weights.h5")
    model.summary()
else:
    def Resnet_finetune_model():
    #IF Not LOADING ..MODEL is BUILD here

bs_pth = './no_resize'
train_dir = bs_pth + '/train'
validation_dir = bs_pth + '/valid'
test_dir = bs_pth + '/test'

batch_size = 8
datagen_tr  = ImageDataGenerator(preprocessing_function=preprocess_input,rotation_range=40,width_shift_range=0.2,height_shift_range=0.2,shear_range=0.2,zoom_range=0.2,channel_shift_range=10,horizontal_flip=True,fill_mode='nearest')
datagen_vd  = ImageDataGenerator(preprocessing_function=preprocess_input)
train_gen = datagen_tr.flow_from_directory(train_dir,target_size=(W, H),batch_size=batch_size,class_mode='categorical',shuffle=True,interpolation="bilinear")
vd_gen = datagen_vd.flow_from_directory(validation_dir,target_size=(W, H),batch_size=batch_size,class_mode='categorical',shuffle=False,interpolation="bilinear")

nTrain = np.size(train_gen.classes)
nVal = np.size(vd_gen.classes)
epochs = 40
steps_per_epoch_tr = int(nTrain/ batch_size)
steps_per_epoch_val =  int(nVal/batch_size)

def get_files(path):
        if os.path.isdir(path):
            files = glob.glob(os.path.join(path, '*'))
        elif path.find('*') > 0:
            files = glob.glob(path)
        else:
            files = [path]

        files = [f for f in files if f.endswith('png') or f.endswith('png')]
        if not len(files):
            sys.exit('No images found by the given path!')
        return files
print("***********Test data :predict class 0*************************************")
files = get_files(test_dir + '/0')
cls_list = ['a','b']
print(cls_list)
# 2-d numpy arrray  of probabibility of each class for each file
pred_c0 = np.empty((0, nclass))
sum_true_class = np.size(files)

for f in files:
    img = image.load_img(f, target_size=(W,H))
    if img is None:
        continue
    x = image.img_to_array(img)
    x = preprocess_input(x)
    x = np.expand_dims(x, axis=0)
    pred = model.predict(x)[0] # [ [a,b]] so needs .x[0]
    pred_c0 = np.append(pred_c0,[pred],axis=0)
    # index of max prob
    indxmx = np.argmax(pred)
    if indxmx != 0:
        sum_true_class = sum_true_class - 1
    top_inds = pred.argsort()[::-1][:5]
    print(f)
    for i in top_inds:
        print('    {:.3f}  {}'.format(pred[i], cls_list[i]))
np.savetxt('./diabeticRetino/diabetic_v9_predict_c0.txt', pred_c0)
print("calss 0 accuracy  = " + str( (sum_true_class/np.size(files))* 100 )  + '%')

print("***********Test data :predict class 1*************************************")

files = get_files(test_dir + '/1')
pred_c1 = np.empty((0, nclass))
sum_true_class = np.size(files)
for f in files:
    img = image.load_img(f, target_size=(W,H))
    if img is None:
        continue
    x = image.img_to_array(img)
    x = preprocess_input(x)
    x = np.expand_dims(x, axis=0)
    pred = model.predict(x)[0]  # net_final or net\n",
    # y = 0
    # ev = model.evaluate(x,y,batch_size= 1)
    pred_c1 = np.append(pred_c1, [pred], axis=0)
    # index of max prob
    indxmx = np.argmax(pred)
    if indxmx != 1:
        sum_true_class = sum_true_class - 1
    top_inds = pred.argsort()[::-1][:5] # gives indices [0,1]
    print(f)
    # print probability and corresponding class name
    for i in top_inds:
        print('    {:.3f}  {}'.format(pred[i], cls_list[i]))
np.savetxt('./diabeticRetino/diabetic_v9_predict_c1.txt', pred_c1)
print("calss 1 accuracy  = " + str( (sum_true_class/np.size(files))* 100 )  + '%')


test_gen = datagen_vd.flow_from_directory(test_dir,target_size=(W, H),batch_size=1,class_mode='categorical',shuffle=False,interpolation="bicubic")
# predict on test data, and save differences
filenames = test_gen.filenames
nTest = len(filenames)
tst_pred = model.predict_generator(test_gen,steps=nTest)
test_pred = np.argmax(tst_pred,axis=1)
tst_lbls = test_gen.classes

plt.plot(range(nTest),tst_lbls,'b-',label = 'True Class Labels:Test')
plt.plot(range(nTest),test_pred,'g-',label = 'Predicted Class Lables"Test')
plt.title('Test Prediction')
plt.legend(loc='center right')
plt.savefig('./diabeticRetino/diabetic_v9_predict.png')

tst_stat = np.vstack((tst_lbls, test_pred)).T
np.savetxt('./diabeticRetino/diabetic_v9_predict.txt', tst_stat)
dif = tst_lbls-test_pred
# count number of zeros( where true class matches predicted class)
print('test_acc_again:' + str(np.count_nonzero(dif==0)/nTest))

现在, pred_c0 (使用keras预测的类0的预测)和 pred_c1 (使用keras的预测类1的预测)应该匹配 tst_pred (对于使用predict_generator的两个类)。但是,如屏幕截图所示,它们之间的匹配不紧密,并且在两个类中 tst_pred 的测试准确度均为100%,对于class 0而言,其93%和class 1使用 pred_c0时其100% pred_c1

enter image description here

enter image description here

enter image description here

我不知道为什么会这样。 请帮忙。
赛迪

0 个答案:

没有答案