如何使用Stanford Parser在C#中正确解析中文文本?

时间:2019-05-22 15:43:27

标签: c# stanford-nlp

我正在使用此语法解析中文文本。该程序可以完美运行,但是在pipline.xmlPrint之后,该文本将转换为问号(?)符号。我不知道我在做什么错。对于英语,即使转换为xml,它也可以正常工作。

StanfordCoreNLP pipeline = null;

        void Load()
        {
            // Annotation pipeline configuration
            var props = new Properties();
            props.setProperty("annotators", "tokenize, ssplit, pos, lemma, ner, depparse");            
            props.setProperty("pos.model", @"chinese-distsim.tagger");
            props.setProperty("parse.model", @"chineseFactored.ser.gz");
            props.setProperty("depparse.language", "chinese");
            props.setProperty("inputEncoding", "UTF-8");
            props.setProperty("ner.useSUTime", "0");
            props.setProperty("threads", "4");
            pipeline = new StanfordCoreNLP(props);
        }
public string ParseText(string textToParse)
        {
            try
            {
                if (pipeline == null)
                {
                    Load();
                }
                // Annotation
                Annotation annotation = new Annotation(textToParse);
                pipeline.annotate(annotation);
                var streamOut = new ByteArrayOutputStream();
                pipeline.xmlPrint(annotation, new PrintWriter(streamOut));
                string textOfXml = streamOut.toString();
                return textOfXml;
            }
            catch (System.OutOfMemoryException ome)
            {
                string err = String.Format("OutOfMemoryException in SentenceParser.ParseText() ; Msg:{0} Stack:{1} File[{2}]",
                    ome.Message, ome.StackTrace, textToParse.Length > 90 ? textToParse.Substring(0, 90) : textToParse);
                return "RESTART";
            }
            catch (Exception ex)
            {
                string err = String.Format("Exception in SentenceParser.ParseText() ; Msg:{0} Stack:{1} File[{2}]", ex.Message, ex.StackTrace, textToParse.Length > 90 ? textToParse.Substring(0, 90) : textToParse);
                Console.WriteLine(err);
                return null;
            }
        }

0 个答案:

没有答案