VBA使用XMLDocument解析XML

时间:2019-01-12 07:06:46

标签: excel xml vba

请帮助。 我想阅读以下XML文档:

<DATA>
    <LEVEL_1>
        <col_1>ALevel1_col1</col_1>
        <col_2>ALevel1_col2</col_2>
        <LEVEL_2>
            <col_1>BLevel2_col1</col_1>
            <col_2>BLevel2_col2</col_2>
            <LEVEL_3>
                <col_1>CLevel3_col1</col_1>
                <col_2>CLevel3_col2</col_2>
            </LEVEL_3>
        </LEVEL_2>
        <LEVEL_2>
            <col_1>B_Level2_col1</col_1>
            <col_2>B_Level2_col2</col_2>
        </LEVEL_2>
    </LEVEL_1>
    <LEVEL_1>
        <col_1>XLevel1_col1</col_1>
        <col_2>XLevel1_col2</col_2>
        <LEVEL_2>
            <col_1>YLevel2_col1</col_1>
            <col_2>YLevel2_col2</col_2>
            <LEVEL_3>
                <col_1>ZLevel3_col1</col_1>
                <col_2>ZLevel3_col2</col_2>
            </LEVEL_3>
        </LEVEL_2>
        <LEVEL_2>
            <col_1>Y_Level2_col1</col_1>
            <col_2>Y_Level2_col2</col_2>
        </LEVEL_2>
    </LEVEL_1>
</DATA>

我想要这样的输出:

ALevel1_col1
ALevel1_col2
    BLevel2_col1
    BLevel2_col2
        CLevel3_col1
        CLevel3_col2
    B_Level2_col1
    B_Level2_col2

XLevel1_col1
XLevel1_col2
    YLevel2_col1
    YLevel2_col2
        ZLevel3_col1
        ZLevel3_col2
    Y_Level2_col1
    Y_Level2_col2

到目前为止,我所获得的是这样的:

Sub test()
    'On Error Resume Next
    Dim sXml
    sXml = "<DATA>" & _
        "   <LEVEL_1>" & _
        "       <col_1>ALevel1_col1</col_1>" & _
        "       <col_2>ALevel1_col2</col_2>" & _
        "       <LEVEL_2>" & _
        "           <col_1>BLevel2_col1</col_1>" & _
        "           <col_2>BLevel2_col2</col_2>" & _
        "           <LEVEL_3>" & _
        "               <col_1>CLevel3_col1</col_1>" & _
        "               <col_2>CLevel3_col2</col_2>" & _
        "           </LEVEL_3>" & _
        "       </LEVEL_2>" & _
        "       <LEVEL_2>" & _
        "           <col_1>B_Level2_col1</col_1>" & _
        "           <col_2>B_Level2_col2</col_2>" & _
        "       </LEVEL_2>" & _
        "   </LEVEL_1>"

    sXml = sXml & "   <LEVEL_1>" & _
        "       <col_1>XLevel1_col1</col_1>" & _
        "       <col_2>XLevel1_col2</col_2>" & _
        "       <LEVEL_2>" & _
        "           <col_1>YLevel2_col1</col_1>" & _
        "           <col_2>YLevel2_col2</col_2>" & _
        "           <LEVEL_3>" & _
        "               <col_1>ZLevel3_col1</col_1>" & _
        "               <col_2>ZLevel3_col2</col_2>" & _
        "           </LEVEL_3>" & _
        "       </LEVEL_2>" & _
        "       <LEVEL_2>" & _
        "           <col_1>Y_Level2_col1</col_1>" & _
        "           <col_2>Y_Level2_col2</col_2>" & _
        "       </LEVEL_2>" & _
        "   </LEVEL_1>" & _
        "</DATA>"

    Set XmlDoc = CreateObject("MSXML2.DOMDocument")
    XmlDoc.async = False
    XmlDoc.LoadXML (sXml)
    'Debug.Print XmlDoc.XML

    For Each LEVEL_1 In XmlDoc.SelectNodes("//DATA/LEVEL_1")
        'Debug.Print LEVEL_1.XML
        Debug.Print LEVEL_1.SelectSingleNode("col_1").Text
        Debug.Print LEVEL_1.SelectSingleNode("col_2").Text

        For Each LEVEL_2 In LEVEL_1.SelectNodes("//LEVEL_2")
            'Debug.Print LEVEL_2.XML
            Debug.Print vbTab & LEVEL_2.SelectSingleNode("col_1").Text
            Debug.Print vbTab & LEVEL_2.SelectSingleNode("col_2").Text

            For Each LEVEL_3 In LEVEL_2.SelectNodes("//LEVEL_3")
                'Debug.Print LEVEL_3.XML
                Debug.Print vbTab & LEVEL_3.SelectSingleNode("col_1").Text
                Debug.Print vbTab & LEVEL_3.SelectSingleNode("col_2").Text
            Next
        Next
    Next
End Sub

输出:

ALevel1_col1
ALevel1_col2
    BLevel2_col1
    BLevel2_col2
    CLevel3_col1
    CLevel3_col2
    ZLevel3_col1
    ZLevel3_col2
    B_Level2_col1
    B_Level2_col2
    CLevel3_col1
    CLevel3_col2
    ZLevel3_col1
    ZLevel3_col2
    YLevel2_col1
    YLevel2_col2
    CLevel3_col1
    CLevel3_col2
    ZLevel3_col1
    ZLevel3_col2
    Y_Level2_col1
    Y_Level2_col2
    CLevel3_col1
    CLevel3_col2
    ZLevel3_col1
    ZLevel3_col2
XLevel1_col1
XLevel1_col2
    BLevel2_col1
    BLevel2_col2
    CLevel3_col1
    CLevel3_col2
    ZLevel3_col1
    ZLevel3_col2
    B_Level2_col1
    B_Level2_col2
    CLevel3_col1
    CLevel3_col2
    ZLevel3_col1
    ZLevel3_col2
    YLevel2_col1
    YLevel2_col2
    CLevel3_col1
    CLevel3_col2
    ZLevel3_col1
    ZLevel3_col2
    Y_Level2_col1
    Y_Level2_col2
    CLevel3_col1
    CLevel3_col2
    ZLevel3_col1
    ZLevel3_col2

有人可以帮忙吗?

谢谢。

2 个答案:

答案 0 :(得分:0)

Yupz ... 找到了解决方案。

将每个节点视为一个新的xml文档。

Sub test()
    'On Error Resume Next

    Dim sXml
    sXml = "<DATA>" & _
        "   <LEVEL_1>" & _
        "       <col_1>ALevel1_col1</col_1>" & _
        "       <col_2>ALevel1_col2</col_2>" & _
        "       <LEVEL_2>" & _
        "           <col_1>BLevel2_col1</col_1>" & _
        "           <col_2>BLevel2_col2</col_2>" & _
        "           <LEVEL_3>" & _
        "               <col_1>CLevel3_col1</col_1>" & _
        "               <col_2>CLevel3_col2</col_2>" & _
        "           </LEVEL_3>" & _
        "       </LEVEL_2>" & _
        "       <LEVEL_2>" & _
        "           <col_1>B_Level2_col1</col_1>" & _
        "           <col_2>B_Level2_col2</col_2>" & _
        "       </LEVEL_2>" & _
        "   </LEVEL_1>"

    sXml = sXml & "   <LEVEL_1>" & _
        "       <col_1>XLevel1_col1</col_1>" & _
        "       <col_2>XLevel1_col2</col_2>" & _
        "       <LEVEL_2>" & _
        "           <col_1>YLevel2_col1</col_1>" & _
        "           <col_2>YLevel2_col2</col_2>" & _
        "           <LEVEL_3>" & _
        "               <col_1>ZLevel3_col1</col_1>" & _
        "               <col_2>ZLevel3_col2</col_2>" & _
        "           </LEVEL_3>" & _
        "       </LEVEL_2>" & _
        "       <LEVEL_2>" & _
        "           <col_1>Y_Level2_col1</col_1>" & _
        "           <col_2>Y_Level2_col2</col_2>" & _
        "       </LEVEL_2>" & _
        "   </LEVEL_1>" & _
        "</DATA>"

    Set XmlDoc = CreateObject("MSXML2.DOMDocument")
    XmlDoc.async = False
    XmlDoc.LoadXML (sXml)
    'Debug.Print XmlDoc.XML

    For Each LEVEL_1 In XmlDoc.SelectNodes("//DATA/LEVEL_1")
        'Debug.Print LEVEL_1.XML
        Debug.Print LEVEL_1.SelectSingleNode("col_1").Text
        Debug.Print LEVEL_1.SelectSingleNode("col_2").Text

        If LEVEL_1.HasChildNodes() Then
            Set a = CreateObject("MSXML2.DOMDocument")
            a.LoadXML (LEVEL_1.XML)

            For Each LEVEL_2 In a.SelectNodes("//LEVEL_2")
                'Debug.Print LEVEL_2.XML
                Debug.Print vbTab & LEVEL_2.SelectSingleNode("col_1").Text
                Debug.Print vbTab & LEVEL_2.SelectSingleNode("col_2").Text

                 If LEVEL_2.HasChildNodes() Then
                    Set b = CreateObject("MSXML2.DOMDocument")
                    b.LoadXML (LEVEL_2.XML)

                    For Each LEVEL_3 In b.SelectNodes("//LEVEL_3")
                        'Debug.Print LEVEL_3.XML
                        Debug.Print vbTab & vbTab & LEVEL_3.SelectSingleNode("col_1").Text
                        Debug.Print vbTab & vbTab & LEVEL_3.SelectSingleNode("col_2").Text
                    Next
                End If
            Next
        End If
    Next

End Sub

输出:

ALevel1_col1
ALevel1_col2
    BLevel2_col1
    BLevel2_col2
        CLevel3_col1
        CLevel3_col2
    B_Level2_col1
    B_Level2_col2
XLevel1_col1
XLevel1_col2
    YLevel2_col1
    YLevel2_col2
        ZLevel3_col1
        ZLevel3_col2
    Y_Level2_col1
    Y_Level2_col2

不太好,但是符合预期。

可能对其他人有用。

谢谢

答案 1 :(得分:0)

递归调用以分析XML结构

您在这里找到了一种通用方法,可以将您的 complete XML结构分析,并将结果写入2维数组v。我建议使用当前的MSXML 6.0版以及递归调用,以便更深入地了解 any 节点结构。

呼叫代码示例

Sub DisplayXML()
' Purpose: write structured node information to array
'          and display results in [2a] immediate window and/or [2b] worksheet of your choice
' Author:  T.M.
Dim sXML$, i&, ii&
Dim XMLFile As Object
'Set XMLFile = CreateObject("Microsoft.XMLDOM")             ' last stable version 3.0
Set XMLFile = CreateObject("MSXML2.DOMDocument.6.0")        ' <~~ recommended version 6.0
XMLFile.Async = False
XMLFile.ValidateOnParse = False
sXML = GetXMLContentString()                                ' get XML content as string value

If XMLFile.LoadXML(sXML) Then                               ' check correct loading
  'Debug.Print XMLFile.XML
' [1] write xml info to array with exact or assumed items count
  Dim v As Variant: ReDim v(1 To XMLFile.SelectNodes("//*").Length, 1 To 2)
  listChildNodes XMLFile.DocumentElement, v                 ' call helper function
' [2a] write results to immediate window                    ' change to your sheet name
  For i = LBound(v) To UBound(v)
      If Len(v(i, 2)) > 0 Then Debug.Print v(i, 2)
  Next i
' [2b] write results to sheet "Dump" showing complete structure
  With ThisWorkbook.Worksheets("Dump")                      ' <~~ change to any wanted sheet name
       .Range("A:B") = ""                                   ' clear result range
       .Range("A1:B1") = Array("XML Tag", "Node Value")     ' titles
       .Range("A2").Resize(UBound(v), UBound(v, 2)) = v     ' get  2-dim info array
  End With
Else
       MsgBox "Load Error " 
End If
Set XMLFile = Nothing
End Sub

Function GetXMLContentString() As String
' Purpose: return specific XML content string (to be loaded as string)
Dim sXML$                           ' data type string
sXML = "<DATA>" & _
    "   <LEVEL_1>" & _
    "       <col_1>ALevel1_col1</col_1>" & _
    "       <col_2>ALevel1_col2</col_2>" & _
    "       <LEVEL_2>" & _
    "           <col_1>BLevel2_col1</col_1>" & _
    "           <col_2>BLevel2_col2</col_2>" & _
    "           <LEVEL_3>" & _
    "               <col_1>CLevel3_col1</col_1>" & _
    "               <col_2>CLevel3_col2</col_2>" & _
    "           </LEVEL_3>" & _
    "       </LEVEL_2>" & _
    "       <LEVEL_2>" & _
    "           <col_1>B_Level2_col1</col_1>" & _
    "           <col_2>B_Level2_col2</col_2>" & _
    "       </LEVEL_2>" & _
    "   </LEVEL_1>"

sXML = sXML & "   <LEVEL_1>" & _
    "       <col_1>XLevel1_col1</col_1>" & _
    "       <col_2>XLevel1_col2</col_2>" & _
    "       <LEVEL_2>" & _
    "           <col_1>YLevel2_col1</col_1>" & _
    "           <col_2>YLevel2_col2</col_2>" & _
    "           <LEVEL_3>" & _
    "               <col_1>ZLevel3_col1</col_1>" & _
    "               <col_2>ZLevel3_col2</col_2>" & _
    "           </LEVEL_3>" & _
    "       </LEVEL_2>" & _
    "       <LEVEL_2>" & _
    "           <col_1>Y_Level2_col1</col_1>" & _
    "           <col_2>Y_Level2_col2</col_2>" & _
    "       </LEVEL_2>" & _
    "   </LEVEL_1>" & _
    "</DATA>"
GetXMLContentString = sXML                      ' return
End Function

辅助功能

Function listChildNodes(oCurrNode As Object, _
                        ByRef v As Variant, _
                        Optional ByRef i As Long = 1, _
                        Optional iLvl As Integer = 0 _
                        ) As Boolean
' Purpose: assign the complete node structure with contents to a 1-based 2-dim array
' Author:  T.M. (https://stackoverflow.com/users/6460297/t-m)
' Note: Late binding XML doesn't allow the use of IXMLDOMNodeType enumeration constants
'       (1 ... NODE_ELEMENT, 2 ... NODE_ATTRIBUTE, 3 ... NODE_TEXT etc.)
' Escape
  If oCurrNode Is Nothing Then Exit Function
  If i < 1 Then i = 1                                       ' one based items Counter
' Automatic increase of array size if needed
  If i >= UBound(v) Then                                    ' change array size if needed
     Dim tmp As Variant
     tmp = Application.Transpose(v)                         ' change rows to columns
     ReDim Preserve tmp(1 To 2, 1 To UBound(v) + 1000)      ' increase row numbers
     v = Application.Transpose(tmp)                         ' transpose back
     Erase tmp
  End If
  Const NAMEColumn& = 1, VALUEColumn& = 2                   ' constants for column 1 and 2
' Declare variables
  Dim oChildNode As Object                                  ' late bound node object
  Dim bDisplay   As Boolean
' ---------------------------------------------------------------------
' A. It's nothing but a TextNode (i.e. a parent node's firstChild!)
' ---------------------------------------------------------------------
If (oCurrNode.NodeType = 3) Then                                 ' 3 ... NODE_TEXT
  ' display pure text content (NODE_TEXT) of parent elements
    v(i, VALUEColumn) = String((iLvl - 1) * 2, " ") & " " & oCurrNode.Text ' nodeValue of text node
    ' return
    listChildNodes = True
ElseIf oCurrNode.NodeType = 1 Then                                ' 1 ... NODE_ELEMENT
   ' --------------------------------------------------------------
   ' B.1 NODE_ELEMENT WITHOUT text node immediately below,
   '     a) e.g. <LEVEL_1> followed by node element <col_1>,
   '        (i.e. FirstChild.NodeType must not be of type NODE_TEXT = 3)
   '     b) node element without any child node (e.g. last <col_2> child node in last LEVEL_2 element)
   '     Note: a text content (NODE_TEXT) actually is a child node(!) to an element node
   '           (see section A. getting the FirstChild of a NODE_ELEMENT)
   ' --------------------------------------------------------------
   ' a) display parent elements of other element nodes
     If oCurrNode.HasChildNodes Then
         If Not oCurrNode.FirstChild.NodeType = 3 Then             ' <>3 ... not a NODE_TEXT
            bDisplay = True
         End If
   ' b) always display empty node elements
     Else                                                           ' empty NODE_ELEMENT
            bDisplay = True
     End If
     If bDisplay Then
            v(i, NAMEColumn) = String(iLvl * 2, " ") & _
                               oCurrNode.nodename & getAtts(oCurrNode)
            i = i + 1
     End If
   ' --------------------------------------------------------------
   ' B.2 check child nodes
   ' --------------------------------------------------------------
     For Each oChildNode In oCurrNode.ChildNodes
      ' ~~~~~~~~~~~~~~~~~
      ' recursive call <<
      ' ~~~~~~~~~~~~~~~~~
        bDisplay = listChildNodes(oChildNode, v, i, iLvl + 1)

        If bDisplay Then
            v(i, NAMEColumn) = String(iLvl * 2, " ") & _
                               oCurrNode.nodename & getAtts(oCurrNode)
            i = i + 1
        End If
     Next oChildNode
   ' return
     listChildNodes = False
Else    ' just to demonstrate the use of other xml types as e.g. <!-- comments -->
     If oCurrNode.NodeType = 8 Then   ' 8 ... NODE_COMMENT
        v(i, VALUEColumn) = "<!-- " & oCurrNode.NodeValue & "-->"
        i = i + 1
     End If
   ' return
     listChildNodes = False
End If

End Function

'Helper function getAtts()
Function getAtts(ByRef node As Object) As String
' Purpose: return attribute(s) string in brackets, e.g. '[@num="123"]'
' Note:    called by above function listChildNodes()
'          not needed in OP, just in case there exist attribute names
' Author:  T.M. (https://stackoverflow.com/users/6460297/t-m)
  Dim sAtts$, ii&
  If node.Attributes.Length > 0 Then
      ii = 0: sAtts = ""
      For ii = 0 To node.Attributes.Length - 1
        sAtts = sAtts & "[@" & node.Attributes.Item(ii).nodename & "=""" & node.Attributes.Item(ii).NodeValue & """]"
      Next ii
  End If
' return
  getAtts = sAtts
End Function