我正在使用此语法解析中文文本。该程序可以完美运行,但是在pipline.xmlPrint之后,该文本将转换为问号(?)符号。我不知道我在做什么错。对于英语,即使转换为xml,它也可以正常工作。
StanfordCoreNLP pipeline = null;
void Load()
{
// Annotation pipeline configuration
var props = new Properties();
props.setProperty("annotators", "tokenize, ssplit, pos, lemma, ner, depparse");
props.setProperty("pos.model", @"chinese-distsim.tagger");
props.setProperty("parse.model", @"chineseFactored.ser.gz");
props.setProperty("depparse.language", "chinese");
props.setProperty("inputEncoding", "UTF-8");
props.setProperty("ner.useSUTime", "0");
props.setProperty("threads", "4");
pipeline = new StanfordCoreNLP(props);
}
public string ParseText(string textToParse)
{
try
{
if (pipeline == null)
{
Load();
}
// Annotation
Annotation annotation = new Annotation(textToParse);
pipeline.annotate(annotation);
var streamOut = new ByteArrayOutputStream();
pipeline.xmlPrint(annotation, new PrintWriter(streamOut));
string textOfXml = streamOut.toString();
return textOfXml;
}
catch (System.OutOfMemoryException ome)
{
string err = String.Format("OutOfMemoryException in SentenceParser.ParseText() ; Msg:{0} Stack:{1} File[{2}]",
ome.Message, ome.StackTrace, textToParse.Length > 90 ? textToParse.Substring(0, 90) : textToParse);
return "RESTART";
}
catch (Exception ex)
{
string err = String.Format("Exception in SentenceParser.ParseText() ; Msg:{0} Stack:{1} File[{2}]", ex.Message, ex.StackTrace, textToParse.Length > 90 ? textToParse.Substring(0, 90) : textToParse);
Console.WriteLine(err);
return null;
}
}