我正在为通用视频创建字幕生成器。 其中一个主要阻碍者是让每个单词的时间戳与视频对齐,这有点让我感到害怕。结果类有一个getTimedBestResult()函数,它应该以word(timestamp)的形式返回一些东西,例如:(0:20-0:22)。在我调用它的那一刻,我得到了一个输出,在“WordToken one null”的条款中,其中一个是被识别的单词,null应该是时间戳。我已阅读文档,目前无法找到解决方案。
String resultText = result.getTimedBestResult(false,false);
我使用hub4模型和CMUdict0.6,如果有任何重要性,请使用 wordPruningBreadthFirstSearchManager(lexTree语言学家)。 我不知道我是否在config.xml中做错了什么,
<?xml version="1.0" encoding="UTF-8"?>
<!-- ********************************************************
Sphinx-4 Configuration file
********************************************************
-->
<config>
<!-- ******************************************************** -->
<!-- frequently tuned properties -->
<!-- ******************************************************** -->
<property name="absoluteBeamWidth" value="10000"/>
<property name="relativeBeamWidth" value="1E-80"/>
<property name="absoluteWordBeamWidth" value="20"/>
<property name="relativeWordBeamWidth" value="1E-60"/>
<property name="wordInsertionProbability" value="0.2"/>
<property name="languageWeight" value="10.5"/>
<property name="silenceInsertionProbability" value=".05"/>
<property name="frontend" value="epFrontEnd"/>
<property name="recognizer" value="recognizer"/>
<property name="showCreations" value="false"/>
<config>
<property name="logLevel" value="SEVERE"/>
</config>
<!-- ************************************************** -->
<!-- Batch mode -->
<!-- ************************************************** -->
<component name="batch" type="edu.cmu.sphinx.tools.batch.BatchModeRecognizer">
<propertylist name="inputDataProcessors">
<item>audioFileDataSource</item>
</propertylist>
<property name="skip" value="0"/>
<property name="recognizer" value="${recognizer}"/>
</component>
<!-- ******************************************************** -->
<!-- word recognizer configuration -->
<!-- ******************************************************** -->
<component name="recognizer" type="edu.cmu.sphinx.recognizer.Recognizer">
<property name="decoder" value="decoder"/>
<propertylist name="monitors">
<item>accuracyTracker </item>
<item>speedTracker </item>
<item>memoryTracker </item>
<item>recognizerMonitor </item>
</propertylist>
</component>
<!-- ******************************************************** -->
<!-- The Decoder configuration -->
<!-- ******************************************************** -->
<component name="decoder" type="edu.cmu.sphinx.decoder.Decoder">
<property name="searchManager" value="wordPruningSearchManager"/>
</component>
<!-- ******************************************************** -->
<!-- The Search Manager -->
<!-- ******************************************************** -->
<component name="wordPruningSearchManager" type="edu.cmu.sphinx.decoder.search.WordPruningBreadthFirstSearchManager">
<property name="logMath" value="logMath"/>
<property name="linguist" value="lexTreeLinguist"/>
<property name="pruner" value="trivialPruner"/>
<property name="scorer" value="threadedScorer"/>
<property name="activeListManager" value="activeListManager"/>
<property name="growSkipInterval" value="0"/>
<property name="checkStateOrder" value="false"/>
<property name="buildWordLattice" value="false"/>
<property name="maxLatticeEdges" value="3"/>
<property name="acousticLookaheadFrames" value="1.7"/>
<property name="relativeBeamWidth" value="${relativeBeamWidth}"/>
</component>
<!-- ******************************************************** -->
<!-- The Active Lists -->
<!-- ******************************************************** -->
<component name="activeListManager" type="edu.cmu.sphinx.decoder.search.SimpleActiveListManager">
<propertylist name="activeListFactories">
<item>standardActiveListFactory</item>
<item>wordActiveListFactory</item>
<item>wordActiveListFactory</item>
<item>standardActiveListFactory</item>
<item>standardActiveListFactory</item>
<item>standardActiveListFactory</item>
</propertylist>
</component>
<component name="standardActiveListFactory" type="edu.cmu.sphinx.decoder.search.PartitionActiveListFactory">
<property name="logMath" value="logMath"/>
<property name="absoluteBeamWidth" value="${absoluteBeamWidth}"/>
<property name="relativeBeamWidth" value="${relativeBeamWidth}"/>
</component>
<component name="wordActiveListFactory" type="edu.cmu.sphinx.decoder.search.PartitionActiveListFactory">
<property name="logMath" value="logMath"/>
<property name="absoluteBeamWidth" value="${absoluteWordBeamWidth}"/>
<property name="relativeBeamWidth" value="${relativeWordBeamWidth}"/>
</component>
<!-- ******************************************************** -->
<!-- The Pruner -->
<!-- ******************************************************** -->
<component name="trivialPruner" type="edu.cmu.sphinx.decoder.pruner.SimplePruner"/>
<!-- ******************************************************** -->
<!-- The Scorer -->
<!-- ******************************************************** -->
<component name="threadedScorer" type="edu.cmu.sphinx.decoder.scorer.ThreadedAcousticScorer">
<property name="frontend" value="${frontend}"/>
<property name="isCpuRelative" value="false"/>
<property name="numThreads" value="10"/>
<property name="minScoreablesPerThread" value="10"/>
<property name="scoreablesKeepFeature" value="false"/>
</component>
<!-- ******************************************************** -->
<!-- The linguist configuration -->
<!-- ******************************************************** -->
<component name="lexTreeLinguist" type="edu.cmu.sphinx.linguist.lextree.LexTreeLinguist">
<property name="logMath" value="logMath"/>
<property name="acousticModel" value="wsj"/>
<property name="languageModel" value="language"/>
<property name="dictionary" value="dict"/>
<property name="addFillerWords" value="false"/>
<property name="fillerInsertionProbability" value="1E-10"/>
<property name="generateUnitStates" value="true"/>
<property name="wantUnigramSmear" value="true"/>
<property name="unigramSmearWeight" value="1"/>
<property name="wordInsertionProbability" value="${wordInsertionProbability}"/>
<property name="silenceInsertionProbability" value="${silenceInsertionProbability}"/>
<property name="languageWeight" value="${languageWeight}"/>
<property name="unitManager" value="unitManager"/>
</component>
<!-- ******************************************************** -->
<!-- The Dictionary configuration -->
<!-- ******************************************************** -->
<component name="dict" type="edu.cmu.sphinx.linguist.dictionary.FullDictionary">
<property name="dictionaryPath" value="file:///C:/Users/Asgard/Documents/workspace/AsgardGit/asgard/dict/cmudict.06d.dict"/>
<property name="fillerPath" value="file:///C:/Users/Asgard/Documents/workspace/AsgardGit/asgard/dict/filler.filler"/>
<property name="addSilEndingPronunciation" value="false"/>
<property name="wordReplacement" value="<sil>"/>
<property name="allowMissingWords" value="false"/>
<property name="unitManager" value="unitManager"/>
</component>
<!-- ******************************************************** -->
<!-- The Language Model configuration -->
<!-- ******************************************************** -->
<component name="language" type="edu.cmu.sphinx.linguist.language.ngram.large.LargeTrigramModel">
<property name="logMath" value="logMath"/>
<property name="maxDepth" value="3"/>
<property name="unigramWeight" value=".5"/>
<property name="dictionary" value="dict"/>
<property name="location" value="file:///C:/Users/Asgard/Documents/workspace/lang/language_model.arpaformat.DMP"/>
</component>
<!-- ******************************************************** -->
<!-- The acoustic model configuration-->
<!-- ******************************************************** -->
<component name="wsj" type="edu.cmu.sphinx.linguist.acoustic.tiedstate.TiedStateAcousticModel">
<property name="loader" value="wsjLoader"/>
<property name="unitManager" value="unitManager"/>
</component>
<component name="wsjLoader" type="edu.cmu.sphinx.linguist.acoustic.tiedstate.Sphinx3Loader">
<property name="logMath" value="logMath"/>
<property name="unitManager" value="unitManager"/>
<property name="location" value="file:///C:/Users/Asgard/Documents/workspace/acc/hub4_cd_continuous_8gau_1s_c_d_dd/"/>
</component>
<!-- ******************************************************** -->
<!-- The unit manager configuration -->
<!-- ******************************************************** -->
<component name="unitManager" type="edu.cmu.sphinx.linguist.acoustic.UnitManager"/>
<!-- ******************************************************** -->
<!-- The frontend configuration -->
<!-- ******************************************************** -->
<component name="epFrontEnd" type="edu.cmu.sphinx.frontend.FrontEnd">
<propertylist name="pipeline">
<item>audioFileDataSource </item>
<item>dataBlocker </item>
<item>speechClassifier </item>
<item>speechMarker </item>
<item>nonSpeechDataFilter </item>
<item>preemphasizer </item>
<item>windower </item>
<item>fft </item>
<item>melFilterBank </item>
<item>dct </item>
<item>liveCMN </item>
<item>featureExtraction </item>
</propertylist>
</component>
<component name="audioFileDataSource" type="edu.cmu.sphinx.frontend.util.AudioFileDataSource"/>
<component name="dataBlocker" type="edu.cmu.sphinx.frontend.DataBlocker"/>
<component name="speechClassifier" type="edu.cmu.sphinx.frontend.endpoint.SpeechClassifier"/>
<component name="nonSpeechDataFilter"
type="edu.cmu.sphinx.frontend.endpoint.NonSpeechDataFilter"/>
<component name="speechMarker" type="edu.cmu.sphinx.frontend.endpoint.SpeechMarker" />
<component name="preemphasizer"
type="edu.cmu.sphinx.frontend.filter.Preemphasizer"/>
<component name="windower"
type="edu.cmu.sphinx.frontend.window.RaisedCosineWindower">
</component>
<component name="fft"
type="edu.cmu.sphinx.frontend.transform.DiscreteFourierTransform">
</component>
<component name="melFilterBank"
type="edu.cmu.sphinx.frontend.frequencywarp.MelFrequencyFilterBank">
</component>
<component name="dct"
type="edu.cmu.sphinx.frontend.transform.DiscreteCosineTransform"/>
<component name="liveCMN"
type="edu.cmu.sphinx.frontend.feature.LiveCMN"/>
<component name="featureExtraction"
type="edu.cmu.sphinx.frontend.feature.DeltasFeatureExtractor"/>
<!-- ******************************************************* -->
<!-- monitors -->
<!-- ******************************************************* -->
<component name="accuracyTracker" type="edu.cmu.sphinx.instrumentation.BestConfidenceAccuracyTracker">
<property name="confidenceScorer" value="confidenceScorer"/>
<property name="recognizer" value="${recognizer}"/>
<property name="showRawResults" value="true"/>
<property name="showAlignedResults" value="true"/>
</component>
<component name="confidenceScorer" type="edu.cmu.sphinx.result.SausageMaker"/>
<component name="memoryTracker" type="edu.cmu.sphinx.instrumentation.MemoryTracker">
<property name="recognizer" value="${recognizer}"/>
<property name="showDetails" value="false"/>
<property name="showSummary" value="false"/>
</component>
<component name="speedTracker" type="edu.cmu.sphinx.instrumentation.SpeedTracker">
<property name="recognizer" value="${recognizer}"/>
<property name="frontend" value="${frontend}"/>
<property name="showDetails" value="false"/>
</component>
<component name="recognizerMonitor" type="edu.cmu.sphinx.instrumentation.RecognizerMonitor">
<property name="recognizer" value="${recognizer}"/>
<propertylist name="allocatedMonitors">
<item>configMonitor</item>
</propertylist>
</component>
<component name="configMonitor" type="edu.cmu.sphinx.instrumentation.ConfigMonitor">
<property name="showConfig" value="false"/>
</component>
<!-- ******************************************************* -->
<!-- Miscellaneous components -->
<!-- ******************************************************* -->
<component name="logMath" type="edu.cmu.sphinx.util.LogMath">
<property name="logBase" value="1.0001"/>
<property name="useAddTable" value="true"/>
</component>
</config>
感谢您的回复