解析XML的最快方法

时间:2020-05-11 22:00:57

标签: xml vba ms-access xml-parsing

我有巨大的XML文件,每个文件大约1GB。它们太大,以至于由于数据量大,即使在Notepad ++中也无法打开。

我编辑了XML,并能够通过DOMDocument60对其进行解析(感谢stackflow所提供的帮助)。

我在Improve speed of VBA上阅读了类似的问题,但是我仍然无法正确实施它,因此需要一些指导。

例如:

  1. 如何在SAX中加载xml?在使用SAX读取之前,是否需要将其加载到DOMDocument60中?
  2. 导入后如何在SAX中逐行读取?在DOMDocument60中,我可以轻松跳转到任何节点,然后遍历子节点,但不确定如何在SAX中执行此操作?
  3. 与DOMDocument相比,SAX解析巨大的XML有什么时间差异?我还没有找到任何实时示例。
  4. 像VBA中的SAX一样,有没有比我可以用来加快解析速度的任何其他库更好的选项了。

感谢您的建议。 (示例XML文件在下面)

<ParentNode type="actual">

<SampleObject class="POC" version="XYZ123" distName="Test1" id="Sample">
  <p name="name">POC1</p>
  <p name="object1">0</p>
  <p name="object2">6</p>
  <p name="object3">0</p>
</SampleObject>

<SampleObject class="POC" version="XYZ123" distName="Test2" id="Sample">
  <p name="name">POC1</p>
  <p name="object1">2</p>
  <p name="object2">10</p>
  <p name="object4">4</p>
  <p name="object3">6</p>
</SampleObject>

<SampleObject class="POC" version="XYZ123" distName="Test3" id="Sample">
  <p name="name">POC1</p>
  <p name="object2">90</p>
  <p name="object3">0</p>
</SampleObject>

<SampleObject class="POC" version="XYZ123" distName="Test4" id="Sample">
  <p name="name">POC1</p>
  <p name="object1">2</p>
  <p name="object2">10</p>
  <p name="object4">40</p>
  <p name="object3">61</p>
</SampleObject>

2 个答案:

答案 0 :(得分:0)

在这里,我可以跟踪上面发布的链接并使用您的示例XML。 只是输出到立即窗口:我不知道您在处理提取的数据...

常规模块中的测试方法:

Sub Tester()

    Const FNAME As String = "example.xml"
    Dim rdr As New MSXML2.SAXXMLReader30
    Dim cnth As New ContentHandler

    Set rdr.ContentHandler = cnth
    rdr.parseURL ThisWorkbook.Path & "\" & FNAME  'test xml file is in same folder as the workbook

End Sub

类模块ContentHandler

Option Explicit

Implements IVBSAXContentHandler

Dim cls, vers, distName, id, pName, pContent
Dim inSO As Boolean, inP As Boolean

Private Sub IVBSAXContentHandler_characters(strChars As String)
    If inP Then Debug.Print "P content:", strChars
End Sub

Private Sub IVBSAXContentHandler_startElement(strNamespaceURI As String, _
                             strLocalName As String, strQName As String, _
                             ByVal oAttributes As MSXML2.IVBSAXAttributes)
    Select Case strLocalName
        Case "SampleObject"
            inSO = True
            cls = oAttributes.getValueFromName("", "class")
            vers = oAttributes.getValueFromName("", "version")
            distName = oAttributes.getValueFromName("", "distName")
            id = oAttributes.getValueFromName("", "id")
            Debug.Print "Start", strLocalName, cls, vers, distName, id
        Case "p"
            inP = True
            pName = oAttributes.getValueFromName("", "name")
            Debug.Print "Start", strLocalName, pName
    End Select
End Sub

Private Sub IVBSAXContentHandler_endElement(strNamespaceURI As String, strLocalName As String, strQName As String)
    Select Case strLocalName
        Case "SampleObject"
            inSO = False
            cls = ""
            vers = ""
            distName = ""
            id = ""
        Case "p"
            pName = ""
            inP = False
    End Select
End Sub

Private Property Set IVBSAXContentHandler_documentLocator( _
                            ByVal RHS As MSXML2.IVBSAXLocator)
End Property

Private Sub IVBSAXContentHandler_startDocument()
End Sub

Private Sub IVBSAXContentHandler_endDocument()
End Sub

Private Sub IVBSAXContentHandler_endPrefixMapping(strPrefix As String)
End Sub

Private Sub IVBSAXContentHandler_ignorableWhitespace(strChars As String)
End Sub

Private Sub IVBSAXContentHandler_processingInstruction(strTarget As String, strData As String)
End Sub

Private Sub IVBSAXContentHandler_skippedEntity(strName As String)
End Sub

Private Sub IVBSAXContentHandler_startPrefixMapping(strPrefix As String, strURI As String)
End Sub

答案 1 :(得分:0)

我尝试在“ IVBSAXContentHandler_StartElement”(在每种选择情况下)中输入新的Sub,但它会产生错误。

Public Sub Ins_2G1(strLocalName As String, cls As String, vers As String, distName As String, id As String)
    Dim DNameArr() As String
    Dim insertcol As String
    Dim insertval As String

    DNameArr() = Split(distName, "/")
    colvalues(0) = distName
    colvalues(1) = DNameArr(1)
    colvalues(2) = DNameArr(2)
    colvalues(3) = DNameArr(3)

    'Converting Generated Parameter Name Array in to String
    insertcol = ""
    For i = LBound(colnames) To UBound(colnames)
        insertcol = insertcol + CStr(colnames(i))
        If i < UBound(colnames) Then
            insertcol = insertcol + ","
        End If
    Next

    'Converting Generated Value Array in to String
    insertval = "'"
    For i = LBound(colvalues) To UBound(colvalues) + 3
        insertval = insertval + CStr(colvalues(i))
        If i < UBound(colvalues) Then
            insertval = insertval + "','"
        End If
    Next
    insertval = insertval + "'"

    'Inserting Record in to POC table
    strSql = "INSERT INTO [" & cls & "] (" & insertcol & ") VALUES (" & insertval & ");"
    db.Execute strSql


End Sub
相关问题