在Java中加载sklearn模型。在python中使用DNNClassifier创建的模型

时间:2017-04-24 22:56:26

标签: java tensorflow tensorflow-serving sklearn-pandas

目标是在Java中使用tensorflow.contrib.learn.learn.DNNClassifier在python中创建/训练模型。

目前主要问题是要知道在会话运行方法中用java给出的“张量”的名称。

我在python中有这个测试代码:

    from __future__ import division, print_function, absolute_import
import tensorflow as tf
import pandas as pd
import tensorflow.contrib.learn as learn
import numpy as np
from sklearn import metrics
from sklearn.cross_validation import train_test_split
from tensorflow.contrib import layers
from tensorflow.contrib.learn.python.learn.utils import input_fn_utils
from tensorflow.python.ops import array_ops
from tensorflow.python.framework import dtypes
from tensorflow.python.util.compat import as_text

print(tf.VERSION)

df = pd.read_csv('../NNNormalizeData-out.csv')

inputs = []
target = []

y=0;    
for x in df.columns:
    if y != 35 :
        #print("added %d" %y)
        inputs.append(x)
    else :
        target.append(x)
    y+=1

total_inputs,total_output = df.as_matrix(inputs).astype(np.float32),df.as_matrix([target]).astype(np.int32)

train_inputs, test_inputs, train_output, test_output = train_test_split(total_inputs, total_output, test_size=0.2, random_state=42)

feature_columns = [tf.contrib.layers.real_valued_column("", dimension=train_inputs.shape[1],dtype=tf.float32)]
#target_column = [tf.contrib.layers.real_valued_column("output", dimension=train_output.shape[1])]

classifier = learn.DNNClassifier(hidden_units=[10, 20, 5], n_classes=5
                                 ,feature_columns=feature_columns)

classifier.fit(train_inputs, train_output, steps=100)

#Save Model into saved_model.pbtxt file (possible to Load in Java)
tfrecord_serving_input_fn = tf.contrib.learn.build_parsing_serving_input_fn(layers.create_feature_spec_for_parsing(feature_columns))  
classifier.export_savedmodel(export_dir_base="test", serving_input_fn = tfrecord_serving_input_fn,as_text=True)


# Measure accuracy
pred = list(classifier.predict(test_inputs, as_iterable=True))
score = metrics.accuracy_score(test_output, pred)
print("Final score: {}".format(score))

# test individual samples 
sample_1 = np.array( [[0.37671986791414125,0.28395908337619136,-0.0966095873607713,-1.0,0.06891621389763203,-0.09716678086712205,0.726029084013637,4.984689881073479E-4,-0.30296253267499107,-0.16192917054985334,0.04820256230479658,0.4951319883569152,0.5269983894210499,-0.2560313828048315,-0.3710980821053321,-0.4845867212612598,-0.8647234314469595,-0.6491591208322198,-1.0,-0.5004549422844073,-0.9880910165770813,0.5540293108747256,0.5625990251930839,0.7420121698556554,0.5445551415657979,0.4644276850235627,0.7316976292340245,0.636690006814346,0.16486621649984112,-0.0466018967678159,0.5261100063227044,0.6256168612312738,-0.544295484930702,0.379125782517193,0.6959368575211544]], dtype=float)
sample_2 = np.array( [[1.0,0.7982741870963959,1.0,-0.46270838239235024,0.040320274521029376,0.443451913224413,-1.0,1.0,1.0,-1.0,0.36689718911339564,-0.13577379160035796,-0.5162916256414466,-0.03373651520104648,1.0,1.0,1.0,1.0,0.786999801054777,-0.43856035121103853,-0.8199093927945158,1.0,-1.0,-1.0,-0.1134921695894473,-1.0,0.6420892436196663,0.7871737734493178,1.0,0.6501788845358409,1.0,1.0,1.0,-0.17586627413625022,0.8817194210401085]], dtype=float)

pred = list(classifier.predict(sample_2, as_iterable=True))
print("Prediction for sample_1 is:{} ".format(pred))

pred = list(classifier.predict_proba(sample_2, as_iterable=True))
print("Prediction for sample_2 is:{} ".format(pred))

创建了model_saved.pbtxt文件。

我尝试使用以下代码在Java中加载此模型:

    public class HelloTF {
    public static void main(String[] args) throws Exception {
        SavedModelBundle bundle=SavedModelBundle.load("/java/workspace/APIJavaSampleCode/tfModels/dnn/ModelSave","serve");
        Session s = bundle.session();

        double[] inputDouble = {1.0,0.7982741870963959,1.0,-0.46270838239235024,0.040320274521029376,0.443451913224413,-1.0,1.0,1.0,-1.0,0.36689718911339564,-0.13577379160035796,-0.5162916256414466,-0.03373651520104648,1.0,1.0,1.0,1.0,0.786999801054777,-0.43856035121103853,-0.8199093927945158,1.0,-1.0,-1.0,-0.1134921695894473,-1.0,0.6420892436196663,0.7871737734493178,1.0,0.6501788845358409,1.0,1.0,1.0,-0.17586627413625022,0.8817194210401085};
        float [] inputfloat=new float[inputDouble.length];
        for(int i=0;i<inputfloat.length;i++)
        {
            inputfloat[i]=(float)inputDouble[i];
        }
        Tensor inputTensor = Tensor.create(new long[] {35}, FloatBuffer.wrap(inputfloat) );

        Tensor result = s.runner()
                .feed("input_example_tensor", inputTensor)
                .fetch("dnn/multi_class_head/predictions/probabilities")
                .run().get(0);


         float[] m = new float[5];
         float[] vector = result.copyTo(m);
         float maxVal = 0;
         int inc = 0;
         int predict = -1;
         for(float val : vector) 
         {
             System.out.println(val+"  ");
             if(val > maxVal) {
                 predict = inc;
                 maxVal = val;
             }
             inc++;
         }
         System.out.println(predict);



    }
} 

我在.run()上得到错误.get(0); line:

Exception in thread "main" org.tensorflow.TensorFlowException: Output 0 of type float does not match declared output type string for node _recv_input_example_tensor_0 = _Recv[_output_shapes=[[-1]], client_terminated=true, recv_device="/job:localhost/replica:0/task:0/cpu:0", send_device="/job:localhost/replica:0/task:0/cpu:0", send_device_incarnation=3663984897684684554, tensor_name="input_example_tensor:0", tensor_type=DT_STRING, _device="/job:localhost/replica:0/task:0/cpu:0"]()
    at org.tensorflow.Session.run(Native Method)
    at org.tensorflow.Session.access$100(Session.java:48)
    at org.tensorflow.Session$Runner.runHelper(Session.java:285)
    at org.tensorflow.Session$Runner.run(Session.java:235)
    at tensorflow.HelloTF.main(HelloTF.java:35)

4 个答案:

答案 0 :(得分:3)

好的我终于解决了:主要的问题是在java中使用的输入的名称是&#34;&#34; dnn / input_from_feature_columns / input_from_feature_columns / concat&#34;而不是&#34; input_example_tensor&#34;。

我使用图形导航发现了这个:tensorboard --logdir = D:\ python \ Workspace \ Autoencoder \ src \ dnn \ ModelSave

这是java代码:

public class HelloTF {
public static void main(String[] args) throws Exception {
    SavedModelBundle bundle=SavedModelBundle.load("/java/workspace/APIJavaSampleCode/tfModels/dnn/ModelSave","serve");
    Session s = bundle.session();

    double[] inputDouble = {1.0,0.7982741870963959,1.0,-0.46270838239235024,0.040320274521029376,0.443451913224413,-1.0,1.0,1.0,-1.0,0.36689718911339564,-0.13577379160035796,-0.5162916256414466,-0.03373651520104648,1.0,1.0,1.0,1.0,0.786999801054777,-0.43856035121103853,-0.8199093927945158,1.0,-1.0,-1.0,-0.1134921695894473,-1.0,0.6420892436196663,0.7871737734493178,1.0,0.6501788845358409,1.0,1.0,1.0,-0.17586627413625022,0.8817194210401085};
    float [] inputfloat=new float[inputDouble.length];
    for(int i=0;i<inputfloat.length;i++)
    {
        inputfloat[i]=(float)inputDouble[i];
    }
FloatBuffer.wrap(inputfloat) );
    float[][] data= new float[1][35];
    data[0]=inputfloat;
    Tensor inputTensor=Tensor.create(data);


    Tensor result = s.runner()
            .feed("dnn/input_from_feature_columns/input_from_feature_columns/concat", inputTensor)
            //.feed("input_example_tensor", inputTensor)
            //.fetch("tensorflow/serving/classify")
            .fetch("dnn/multi_class_head/predictions/probabilities")
            //.fetch("dnn/zero_fraction_3/Cast")
            .run().get(0);


     float[][] m = new float[1][5];
     float[][] vector = result.copyTo(m);
     float maxVal = 0;
     int inc = 0;
     int predict = -1;
     for(float val : vector[0]) 
     {
         System.out.println(val+"  ");
         if(val > maxVal) {
             predict = inc;
             maxVal = val;
         }
         inc++;
     }
     System.out.println(predict);



}

}

我测试了输出:

phyton方面:

Prediction for sample_2 is:[3] 
Prediction for sample_2 is:[array([ 0.17157166,  0.24475774,  0.16158019,  0.24648622,  0.17560424], dtype=float32)] 

Java Side:

0.17157166  
0.24475774  
0.16158019   
0.24648622  
0.17560424  
3

答案 1 :(得分:1)

错误消息提供了线索:模型中名为"input_example_tensor"的张量期望具有string个内容,而您提供float个值。

根据张量的名称和你的代码判断,我猜你所喂食的张量是defined in input_fn_utils.py。这个张量传递给tf.parse_example() op,它需要一个tf.train.Example协议缓冲区的向量,序列化为字符串。

答案 2 :(得分:0)

我在Tensorflow 1.1上收到了feed("input_example_tensor", inputTensor)的错误。

但我发现example.proto可以作为“input_example_tensor”提供,虽然花了很多时间来弄清楚如何为序列化协议缓冲区创建字符串张量。

这就是我创建inputTensor的方式。

org.tensorflow.example.Example.Builder example = org.tensorflow.example.Example.newBuilder();   
/* set some features to example... */

Tensor exampleTensor = Tensor.create(example.build().toByteArray());
// Here, the shape of exampleTensor is not specified yet.

// Set the shape to feed this as "input_example_tensor"
Graph g = bundle.graph(); 
Output examplePlaceholder =
                  g.opBuilder("Placeholder", "example")
                  .setAttr("dtype", exampleTensor.dataType())                        
                      .build().output(0);
Tensor shapeTensor = Tensor.create(new long[]{1}, IntBuffer.wrap(new int[]{1}));                      
Output shapeConst = g.opBuilder("Const", "shape")
                      .setAttr("dtype", shapeTensor.dataType())
                      .setAttr("value", shapeTensor)
                      .build().output(0);
Output shaped = g.opBuilder("Reshape", "output").addInput(examplePlaceholder).addInput(shapeConst).build().output(0);


Tensor inputTensor = s.runner().feed(examplePlaceholder, exampleTensor).fetch(shaped).run().get(0);                   
// Now, inputTensor has shape of [1] and ready to feed.     

答案 3 :(得分:0)

.feed()和.fetch()中的参数应与输入和输出数据类型匹配。

您可以查看savedmodel.pbtxt文件。有关您的参数及其输入/输出类型的详细信息。

例如,

我的java代码

Tensor result = s.runner()
        .feed("ParseExample/ParseExample", inputTensor)
        .fetch("dnn/binary_logistic_head/predictions/probabilities")
        .run().get(0);

我的savedModel.pbtxt(部分内容)

node {
  name: "ParseExample/ParseExample"
  op: "ParseExample"
  input: "input_example_tensor"
  input: "ParseExample/ParseExample/names"
  input: "ParseExample/ParseExample/dense_keys_0"
  input: "ParseExample/Const"
  attr {
    key: "Ndense"
    value {
      i: 1
    }
  }
  attr {
    key: "Nsparse"
    value {
      i: 0
    }
  }
  attr {
    key: "Tdense"
    value {
      list {
        type: DT_FLOAT
      }
    }
  }
  attr {
    key: "_output_shapes"
    value {
      list {
        shape {
          dim {
            size: -1
          }
          dim {
            size: 2
          }
        }
      }
    }
  }
  attr {
    key: "dense_shapes"
    value {
      list {
        shape {
          dim {
            size: 2
          }
        }
      }
    }
  }
  attr {
    key: "sparse_types"
    value {
      list {
      }
    }
  }
}
  outputs {
    key: "scores"
    value {
      name: "dnn/binary_logistic_head/predictions/probabilities:0"
      dtype: DT_FLOAT
      tensor_shape {
        dim {
          size: -1
        }
        dim {
          size: 2
        }
      }
    }
  }

它们都兼容我的数据类型float。