加载到XDocument时如何解析实体?

时间:2009-10-29 18:45:20

标签: c# xml xhtml

我正在尝试将XHTML文档加载到XDocument中,但是我得到了“对未声明的实体的引用”的例外情况。我需要解析®»等实体。

我相信我的文件是正确形成的,这是头部:

<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">

当我执行XDocument.Load(<StringReader>)时,我会抛出这些异常。

2 个答案:

答案 0 :(得分:10)

这是msdn和博客帖子的合作。

        XDocument document;

        using (var stringReader = new StringReader(output))
        {
            var settings = new XmlReaderSettings
            {
                ProhibitDtd = false,
                XmlResolver = new LocalXhtmlXmlResolver(bool.Parse(ConfigurationManager.AppSettings["CacheDTDs"]))
            };

            document = XDocument.Load(XmlReader.Create(stringReader, settings));
        }

    private class LocalXhtmlXmlResolver : XmlUrlResolver
    {
        private static readonly Dictionary<string, Uri> KnownUris = new Dictionary<string, Uri>
        {
            { "-//W3C//DTD XHTML 1.0 Strict//EN", new Uri("http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd") },
            { "-//W3C XHTML 1.0 Transitional//EN", new Uri("http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd") },
            { "-//W3C//DTD XHTML 1.0 Transitional//EN", new Uri("http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd") },
            { "-//W3C XHTML 1.0 Frameset//EN", new Uri("http://www.w3.org/TR/xhtml1/DTD/xhtml1-frameset.dtd") },
            { "-//W3C//DTD XHTML 1.1//EN", new Uri("http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd") }
        };

        private bool enableHttpCaching;
        private ICredentials credentials;

        public LocalXhtmlXmlResolver(bool enableHttpCaching)
        {
            this.enableHttpCaching = enableHttpCaching;
        }

        public override Uri ResolveUri(Uri baseUri, string relativeUri)
        {
            Debug.WriteLineIf(!KnownUris.ContainsKey(relativeUri), "Could not find: " + relativeUri);

            return KnownUris.ContainsKey(relativeUri) ? KnownUris[relativeUri] : base.ResolveUri(baseUri, relativeUri);
        }

        public override object GetEntity(Uri absoluteUri, string role, Type ofObjectToReturn)
        {
            if (absoluteUri == null)
            {
                throw new ArgumentNullException("absoluteUri");
            }

            //resolve resources from cache (if possible)
            if (absoluteUri.Scheme == "http" && this.enableHttpCaching && (ofObjectToReturn == null || ofObjectToReturn == typeof(Stream)))
            {
                var request = WebRequest.Create(absoluteUri);

                request.CachePolicy = new HttpRequestCachePolicy(HttpRequestCacheLevel.Default);

                if (this.credentials != null)
                {
                    request.Credentials = this.credentials;
                }

                var response = request.GetResponse();

                return response.GetResponseStream();
            }

            //otherwise use the default behavior of the XmlUrlResolver class (resolve resources from source)
            return base.GetEntity(absoluteUri, role, ofObjectToReturn);
        }
    }

答案 1 :(得分:8)

我和Dave有同样的问题,并且遇到了这个帮助我很多的问题。根据戴夫的回答和帕维尔的优化建议,我更新了课程。现在,DTD可以存储为嵌入式资源,并在必要时加载。我知道这个帖子已经有几年了但也许这可以帮助别人。

使用示例:

XmlReaderSettings readerSettings = new XmlReaderSettings
    {
        DtdProcessing = DtdProcessing.Parse,
        XmlResolver = new LocalXhtmlXmlResolver()
    };

using (XmlReader reader = XmlReader.Create(xhtmlStream, readerSettings))
{
    XDocument xhtml = XDocument.Load(reader);
    ...
}

LocalXhtmlXmlResolver类:

public class LocalXhtmlXmlResolver : XmlUrlResolver
{
    private const string ResourcePrefix = "Your.Namespace.Here.";

    private static readonly Dictionary<string, string> _knownDtds = new Dictionary<string, string>
        {
            { "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd", ResourcePrefix + "xhtml1-strict.dtd" },
            { "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd", ResourcePrefix + "xhtml1-transitional.dtd" },
            { "http://www.w3.org/TR/xhtml1/DTD/xhtml1-frameset.dtd", ResourcePrefix + "xhtml1-frameset.dtd" },
            { "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd", ResourcePrefix + "xhtml11.dtd" },
            { "http://www.w3.org/TR/xhtml1/DTD/-//W3C//ENTITIES Latin 1 for XHTML//EN", ResourcePrefix + "xhtml-lat1.ent" },
            { "http://www.w3.org/TR/xhtml1/DTD/-//W3C//ENTITIES Special for XHTML//EN", ResourcePrefix + "xhtml-special.ent" },
            { "http://www.w3.org/TR/xhtml1/DTD/-//W3C//ENTITIES Symbols for XHTML//EN", ResourcePrefix + "xhtml-symbol.ent" }
        };

    private static readonly Dictionary<string, Uri> _knownUris = new Dictionary<string, Uri>
        {
            { "-//W3C//DTD XHTML 1.0 Strict//EN", new Uri("http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd") },
            { "-//W3C XHTML 1.0 Transitional//EN", new Uri("http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd") },
            { "-//W3C//DTD XHTML 1.0 Transitional//EN", new Uri("http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd") },
            { "-//W3C XHTML 1.0 Frameset//EN", new Uri("http://www.w3.org/TR/xhtml1/DTD/xhtml1-frameset.dtd") },
            { "-//W3C//DTD XHTML 1.1//EN", new Uri("http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd") }
        };

    public override Uri ResolveUri(Uri baseUri, string relativeUri)
    {
        return _knownUris.ContainsKey(relativeUri) ? _knownUris[relativeUri] : base.ResolveUri(baseUri, relativeUri);
    }

    public override object GetEntity(Uri absoluteUri, string role, Type ofObjectToReturn)
    {
        if (absoluteUri == null)
        {
            throw new ArgumentNullException("absoluteUri");
        }

        if (_knownDtds.ContainsKey(absoluteUri.OriginalString))
        {
            string resourceName = _knownDtds[absoluteUri.OriginalString];
            Assembly assembly = Assembly.GetAssembly(typeof(LocalXhtmlXmlResolver));
            return assembly.GetManifestResourceStream(resourceName);
        }

        return base.GetEntity(absoluteUri, role, ofObjectToReturn);
    }
}