无法解码

时间:2019-04-04 05:21:05

标签: android

我正在尝试在使用tensorflow的移动应用程序上运行语音识别模型。我面临以下问题: 当我在应用程序中讲话时,它会提取并打印MFCC功能,但随后会崩溃并给出以下错误:

04-03 12:56:16.754 24654-24815/org.tensorflow.demo E/TensorFlowInferenceInterface: Failed to run TensorFlow inference with inputs:[SeqLen], outputs:[SparseToDense]

--------- beginning of crash
04-03 12:56:16.755 24654-24815/org.tensorflow.demo E/AndroidRuntime: FATAL EXCEPTION: Thread-7556
Process: org.tensorflow.demo, PID: 24654
java.lang.IllegalArgumentException: Expects arg[0] to be int32 but float is provided
at org.tensorflow.Session.run(Native Method)
at org.tensorflow.Session.access$100(Session.java:48)
at org.tensorflow.Session$Runner.runHelper(Session.java:314)
at org.tensorflow.Session$Runner.run(Session.java:264)
at org.tensorflow.contrib.android.TensorFlowInferenceInterface.run(TensorFlowInferenceInterface.java:228)
at org.tensorflow.contrib.android.TensorFlowInferenceInterface.run(TensorFlowInferenceInterface.java:197)
at org.tensorflow.contrib.android.TensorFlowInferenceInterface.run(TensorFlowInferenceInterface.java:187)
at org.tensorflow.demo.SpeechActivity.recognize(SpeechActivity.java:229)
at org.tensorflow.demo.SpeechActivity.access$100(SpeechActivity.java:48)
at org.tensorflow.demo.SpeechActivity$3.run(SpeechActivity.java:193)
at java.lang.Thread.run(Thread.java:818)

这是我的代码:

/**
 * An activity that listens for audio and then uses a Pretrained model to detect speech content,
 * and turn it into text.
 */
public class SpeechActivity extends Activity {

  // Constants that control the behavior of the recognition code and model
  // settings.
  private static final int SAMPLE_RATE = 16000;
  private static final int SAMPLE_DURATION_MS = 5000;
  private static final int RECORDING_LENGTH = (int) (SAMPLE_RATE * SAMPLE_DURATION_MS / 1000);
  private static final String MODEL_FILENAME = "file:///android_asset/april_model.pb"; // my model
  private static final String INPUT_DATA_NAME = "SeqLen";
  private static final String OUTPUT_SCORES_NAME = "SparseToDense";

  private static final char[] map = new char[]{'0', ' ', 'a', 'b', 'c', 'd', 'e', 'f', 'g',
          'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q',
          'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z'};

  // UI elements.
  private static final int REQUEST_RECORD_AUDIO = 13;
  private Button startButton;
  private TextView outputText;
  private static final String LOG_TAG = SpeechActivity.class.getSimpleName();

  // Working variables.
  short[] recordingBuffer = new short[RECORDING_LENGTH];
  int recordingOffset = 0;
  boolean shouldContinue = true;
  private Thread recordingThread;
  boolean shouldContinueRecognition = true;
  private Thread recognitionThread;
  private final ReentrantLock recordingBufferLock = new ReentrantLock();
  private TensorFlowInferenceInterface inferenceInterface;

  @Override
  protected void onCreate(Bundle savedInstanceState) {
    // Set up the UI.
    super.onCreate(savedInstanceState);
    setContentView(R.layout.activity_speech);
    startButton = (Button) findViewById(R.id.start);
    startButton.setOnClickListener(
            new View.OnClickListener() {
              @Override
              public void onClick(View view) {

                startRecording();
              }
            });
    outputText = (TextView) findViewById(R.id.output_text);


    // Load the Pretrained WaveNet model.
    inferenceInterface = new TensorFlowInferenceInterface(getAssets(), MODEL_FILENAME);

    requestMicrophonePermission();
  }

  private void requestMicrophonePermission() {
    requestPermissions(
            new String[] {android.Manifest.permission.RECORD_AUDIO}, REQUEST_RECORD_AUDIO);
  }

  @Override
  public void onRequestPermissionsResult(
          int requestCode, String[] permissions, int[] grantResults) {
    if (requestCode == REQUEST_RECORD_AUDIO
            && grantResults.length > 0
            && grantResults[0] == PackageManager.PERMISSION_GRANTED) {
    }
  }

  public synchronized void startRecording() {
    if (recordingThread != null) {
      return;
    }
    shouldContinue = true;
    recordingThread =
            new Thread(
                    new Runnable() {
                      @Override
                      public void run() {
                        record();
                      }
                    });
    recordingThread.start();
  }

  private void record() {
    android.os.Process.setThreadPriority(android.os.Process.THREAD_PRIORITY_AUDIO);

    // Estimate the buffer size we'll need for this device.
    int bufferSize =
            AudioRecord.getMinBufferSize(
                    SAMPLE_RATE, AudioFormat.CHANNEL_IN_MONO, AudioFormat.ENCODING_PCM_16BIT);
    if (bufferSize == AudioRecord.ERROR || bufferSize == AudioRecord.ERROR_BAD_VALUE) {
      bufferSize = SAMPLE_RATE * 2;
    }
    short[] audioBuffer = new short[bufferSize / 2];

    AudioRecord record =
            new AudioRecord(
                    MediaRecorder.AudioSource.DEFAULT,
                    SAMPLE_RATE,
                    AudioFormat.CHANNEL_IN_MONO,
                    AudioFormat.ENCODING_PCM_16BIT,
                    bufferSize);

    if (record.getState() != AudioRecord.STATE_INITIALIZED) {
      Log.e(LOG_TAG, "Audio Record can't initialize!");
      return;
    }

    record.startRecording();

    Log.v(LOG_TAG, "Start recording");


    while (shouldContinue) {
      int numberRead = record.read(audioBuffer, 0, audioBuffer.length);
      Log.v(LOG_TAG, "read: " + numberRead);
      int maxLength = recordingBuffer.length;
      recordingBufferLock.lock();
      try {
        if (recordingOffset + numberRead < maxLength) {
          System.arraycopy(audioBuffer, 0, recordingBuffer, recordingOffset, numberRead);
        } else {
          shouldContinue = false;
        }
        recordingOffset += numberRead;
      } finally {
        recordingBufferLock.unlock();
      }
    }
    record.stop();
    record.release();
    startRecognition();
  }

  public synchronized void startRecognition() {
    if (recognitionThread != null) {
      return;
    }
    shouldContinueRecognition = true;
    recognitionThread =
            new Thread(
                    new Runnable() {
                      @Override
                      public void run() {
                        recognize();
                      }
                    });
    recognitionThread.start();
  }

  private void recognize() {
    Log.v(LOG_TAG, "Start recognition");

    short[] inputBuffer = new short[RECORDING_LENGTH];
    double[] doubleInputBuffer = new double[RECORDING_LENGTH];
    long[] outputScores = new long[157];
    String[] outputScoresNames = {OUTPUT_SCORES_NAME};


    recordingBufferLock.lock();
    try {
      int maxLength = recordingBuffer.length;
      System.arraycopy(recordingBuffer, 0, inputBuffer, 0, maxLength);
    } finally {
      recordingBufferLock.unlock();
    }

    // We need to feed in float values between -1.0 and 1.0, so divide the
    // signed 16-bit inputs.
    for (int i = 0; i < RECORDING_LENGTH; ++i) {
      doubleInputBuffer[i] = inputBuffer[i] / 32767.0;
    }

    //MFCC java library.
    MFCC mfccConvert = new MFCC();
    float[] mfccInput = mfccConvert.process(doubleInputBuffer);
    Log.v(LOG_TAG, "MFCC Input======> " + Arrays.toString(mfccInput));

    // Run the model.
    inferenceInterface.feed(INPUT_DATA_NAME, mfccInput, 1, 157, 20);
    inferenceInterface.run(outputScoresNames); //This is where the error message points
    inferenceInterface.fetch(OUTPUT_SCORES_NAME, outputScores);
    Log.v(LOG_TAG, "OUTPUT======> " + Arrays.toString(outputScores));


    //Output the result.
    String result = "";
    for (int i = 0;i<outputScores.length;i++) {
      if (outputScores[i] == 0)
        break;
      result += map[(int) outputScores[i]];
    }
    final String r = result;
    this.runOnUiThread(new Runnable() {
      @Override
      public void run() {
        outputText.setText(r);
      }
    });

    Log.v(LOG_TAG, "End recognition: " +result);
  }

}

有人可以帮我解决这个问题吗?

这是我为Android代码所引用的链接: https://github.com/chiachunfu/speech 我只是用我的模型代替了他们使用的股票模型。

0 个答案:

没有答案