如何在java中反序列化DoccatModel对象?

时间:2016-03-03 11:08:51

标签: java

我正在尝试用Java序列化DoccatModel对象(我正在使用OpenNLP算法)。

它能够正确地序列化对象,我将其写入外部文件。但是,我无法将对象反序列化。

我正在尝试使用ObjectInputStream来读取该文件。它引发了一个错误:

  

" java.io.StreamCorruptedException:无效的流标题:504B0304"

我只想知道如何对DoccatModel对象进行反序列化,以便我可以进一步使用它。

代码如下。 [对于糟糕的代码,因为它仍然处于开发阶段,所以道歉]

private static void runDocCat(String textField, String updateField, String tgtField)
{
    String dbName;
    String tableName;
    DoccatModel model = null;
    JSONArray allDesc = null;
    DataConfig BSC = null;
    try
    {
        BSC = new DataConfig();
    } 
    catch (Exception e1)
    {
        e1.printStackTrace();
    }

    try
    {
        dbName = BSC.getdbName();
        tableName = BSC.tablename;
        String dataQuery = "SELECT ID, VOC, " + tgtField + " from " + dbName + "." + tableName;
        allDesc = BSC.getJSONArray(dataQuery);
        File file = new File("voc.train");
        BufferedWriter output = new BufferedWriter(new FileWriter(file));

        for (int i = 0; i < allDesc.length(); i++)
        {
            String tgt = allDesc.getJSONObject(i).getString(tgtField);
            if (tgt.length() < 3)
                tgt = "Unknown";
           tgt = tgt.replaceAll(" ", "");
           String desc = allDesc.getJSONObject(i).getString(textField);
           desc = desc.replaceAll("\\r", " ").replaceAll("\\n", ".");
           if (!desc.trim().equalsIgnoreCase("nothing"))
           {
                DocumentSample currDoc = new DocumentSample(tgt, desc);
                output.write(currDoc.toString());
                output.newLine();
           }
        }

        output.close();
        System.out.println("Training Data Generated!");
        ObjectStream<String> lineStream = new PlainTextByLineStream(new FileReader(file));
        ObjectStream<DocumentSample> sampleStream = new DocumentSampleStream(lineStream);
        model = DocumentCategorizerME.train("en", sampleStream);
        System.out.println("Model Data \n\n" + model);
        // Write to a file
        OutputStream modelOut = null;
        try
        {
            String modelfile = "C:\\VOC_Classification\\model.ser";
            modelOut = new BufferedOutputStream(new FileOutputStream(modelfile));
            model.serialize(modelOut);
            System.out.println("Model Data \n\n" + model);
        } 
        catch (IOException e)
        {
            e.printStackTrace();
        } 
        finally
        {
            if (modelOut != null)
            try
            {
                modelOut.close();
            } 
            catch (IOException ex)
            {
                ex.printStackTrace();
            }
        }

    // try
    // {
    // FileOutputStream fileOut = new FileOutputStream("modelfile.ser");
    // ObjectOutputStream out = new ObjectOutputStream(fileOut);
    // out.writeObject(model);
    // out.close();
    // fileOut.close();
    // System.out.printf("Serialized data is saved in modelfile.ser");
    // } catch (IOException i)
    // {
    // i.printStackTrace();
    // }

    DoccatModel model_SER = null;
    try
    {
        FileInputStream fileIn = new FileInputStream("C:\\VOC_Classification\\model.ser");
        ObjectInputStream in = new ObjectInputStream(fileIn);
        model_SER = (DoccatModel) in.readObject();
        in.close();
        fileIn.close();
    } catch (IOException e)
    {
        e.printStackTrace();
    }

    System.out.println("\nTraining Done!\n");

    DocumentCategorizerME myCategorizer = new DocumentCategorizerME(model);
    System.out.println(" ---------- Starting categorising process ----------");
    System.out.println("\nProcess Running, Please wait...\n");
    for (int i = 0; i < allDesc.length(); i++)
    {

    dataQuery = "SELECT ID, VOC," + tgtField + " from " + dbName + "." + tableName;
    allDesc = BSC.getJSONArray(dataQuery);
    String ID = allDesc.getJSONObject(i).getString("ID");
    String desc = allDesc.getJSONObject(i).getString(textField);
    String newdesc = desc.replaceAll("\\n", ".").replaceAll("\\r", " ");
    if (!newdesc.trim().equalsIgnoreCase("nothing"))
    {
        double[] outcomes = myCategorizer.categorize(newdesc);
        String category = myCategorizer.getBestCategory(outcomes);
        if (!category.equalsIgnoreCase("Unknown"))
        {
        String updQuery = "UPDATE " + dbName + "." + tableName + " set " + updateField + " = " + "'"
            + category + "'" + " WHERE ID = " + ID;
        BSC.executeUpdate(updQuery);
        }
    }

    }
    System.out.println(" ---------- Process Completed Successfully ----------");
    System.out.println("\nCheck your table  \"" + tableName + "\" for results");
    System.out.print("\n\nPress 1 to finish : ");
    BufferedReader br = new BufferedReader(new InputStreamReader(System.in));
    int ch = Integer.parseInt(br.readLine());
    } catch (Exception e)
    {
        e.printStackTrace();
    } 
    finally
    {
        try
        {
            BSC.connectionClose();
          } 
          catch (SQLException e)
          {
               // TODO Auto-generated catch block
               e.printStackTrace();
          }
      }
   }
}

1 个答案:

答案 0 :(得分:0)

OpenNLP文档尚不清楚,但有一种简单的方法可以做到这一点。

 private DoccatModel deSerializeModel(String modelPath) throws
        IOException {
    FileInputStream fileInputStream = new FileInputStream(modelPath);
    DoccatModel model = new DoccatModel(fileInputStream);
    return model;
}
相关问题