从XmlReader

时间:2015-08-03 07:27:56

标签: c# xml xpath

我正在编写一个应用程序,它从各种源解析动态xml并遍历XML并返回所有唯一元素。

鉴于Xml文件有时非常大,我使用XmlReader来解析由于内存限制而导致的Xml结构。

public IDictionary<string, int> Discover(string filePath)
    {
        Dictionary<string, string> nodeTable = new Dictionary<string, string>();
        using (XmlReader reader = XmlReader.Create(filePath))
        {
            while (!reader.EOF)
            {
                if (reader.NodeType == XmlNodeType.Element)
                {
                    if (!nodeTable.ContainsKey(reader.LocalName))
                    {
                        nodeTable.Add(reader.LocalName,  reader.Depth);
                    }

                }
                reader.Read();
            }
        }
        Debug.WriteLine("The node table has {0} items.", nodeTable.Count);


        return nodeTable;
    }

这是一种享受,并且性能良好且高效,但是最后一部分难以理解,我正在尝试为每个元素生成XPath。

现在,这起初似乎很容易使用这样的东西。

var elements = new Stack<string>();

while (reader.Read())
{
    switch (reader.NodeType)
    {
        case XmlNodeType.Element:
            elements.Push(reader.LocalName);
            break;
        case XmlNodeType.EndElement:
            elements.Pop();
            break;
        case XmlNodeType.Text:
            path = string.Join("/", elements.Reverse());
            break;
    }
}

但这只能给我一部分解决方案。鉴于我希望为树中包含数据的每个节点返回XPath,并检测给定节点树是否包含嵌套的数据集。

<customers>
  <customer id=2>
    <name>ted smith</name>
    <addresses>
      <address1>
           <line1></line1>
      </address1>
      <address2>
           <line1></line1>
           <line2></line2>
      </address2>
    </addresses>
  </customer>
  <customer id=322>
    <name>smith mcsmith</name>
    <addresses>
      <address1>
           <line1></line1>
           <line2></line2>
      </address1>
      <address2>
           <line1></line1>
           <line2></line2>
      </address2>
    </addresses>
  </customer>
</customers>

请记住,数据是完全动态的,架构是未知的。

所以输出应该包括

/customer/name
/customer/address1/line1
/customer/address1/line2
/customer/address2/line1
/customer/address2/line2

1 个答案:

答案 0 :(得分:1)

我喜欢使用递归方法而不是push / pop。见下面的代码

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Xml;
using System.IO;

namespace ConsoleApplication1
{
    class Program
    {
        static void Main(string[] args)
        {
            string input =
                "<customers>" +
                  "<customer id=\"2\">" +
                    "<name>ted smith</name>" +
                    "<addresses>" +
                      "<address1>" +
                           "<line1></line1>" +
                      "</address1>" +
                      "<address2>" +
                           "<line1></line1>" +
                           "<line2></line2>" +
                      "</address2>" +
                    "</addresses>" +
                  "</customer>" +
                  "<customer id=\"322\">" +
                    "<name>smith mcsmith</name>" +
                    "<addresses>" +
                      "<address1>" +
                           "<line1></line1>" +
                           "<line2></line2>" +
                      "</address1>" +
                      "<address2>" +
                           "<line1></line1>" +
                           "<line2></line2>" +
                      "</address2>" +
                    "</addresses>" +
                  "</customer>" +
                "</customers>";

            StringReader sReader = new StringReader(input);
            XmlReader reader = XmlReader.Create(sReader);
            Node root = new Node();
            ReadNode(reader, root);

        }
        static bool ReadNode(XmlReader reader, Node node)
        {
            Boolean done = false;
            Boolean endElement = false;

            while(done = reader.Read())
            {
                switch (reader.NodeType)
                {
                    case XmlNodeType.Element:
                        if (node.name.Length == 0)
                        {
                            node.name = reader.Name;
                            GetAttrubutes(reader, node);
                        }
                        else
                        {
                            Node newNode = new Node();
                            newNode.name = reader.Name;
                            if (node.children == null)
                            {
                                node.children = new List<Node>();
                            }
                            node.children.Add(newNode);
                            GetAttrubutes(reader, newNode);
                            done = ReadNode(reader, newNode);
                        }
                        break;
                    case XmlNodeType.EndElement:
                        endElement = true;
                        break;
                    case XmlNodeType.Text:
                        node.text = reader.Value;
                        break;
                    case XmlNodeType.Attribute:
                        if (node.attributes == null)
                        {
                            node.attributes = new Dictionary<string, string>();
                        }
                        node.attributes.Add(reader.Name, reader.Value);
                        break;
                }
                if (endElement)
                    break;
            }
            return done;
        }
        static void GetAttrubutes(XmlReader reader, Node node)
        {
            for (int i = 0; i < reader.AttributeCount; i++)
            {
                if (i == 0) node.attributes = new Dictionary<string, string>();
                reader.MoveToNextAttribute();
                node.attributes.Add(reader.Name, reader.Value);
            }
        }
    }
    public class Node
    {
        public string name = string.Empty;
        public string text = string.Empty;
        public Dictionary<string, string> attributes = null;
        public List<Node> children = null;
    }
}
​
相关问题