如何从 Word 中提取嵌入的文件

时间:2021-02-07 13:57:10

标签: vba ms-word docx

我编写了一个 Word 宏 (VBA),它从 Word 文档(docx 或 docm,而不是 doc)中提取以下 (Ole) 嵌入文件:

  • 文档、文档、文档
  • xls、xlsx、xlsm(xlsb 已复制但无效)
  • ppt、pptx
  • pdf
  • txt
  • exe
  • 压缩包,rar
  • mp3、wav
  • mp4、avi
  • html

如果您有任何建议,请告诉我。

您可以将此宏复制到 Word 模块中,例如“正常”并在菜单栏中创建一个链接。

Option Explicit
Sub ExtractFilesFromWord()
    Dim Home, Tmp, Word As String
    Dim sh, FSO As Object
    
    Home = ActiveDocument.Path & "\"
    If Home = "" Then
        MsgBox "No document open. Do nothing."
        Exit Sub
    ElseIf LCase$(Mid$(ActiveDocument.Name, Len(ActiveDocument.Name) - 3, 3)) <> "doc" Then
        MsgBox "Not a docx or a docm. Do nothing."
        Exit Sub
    End If
    Tmp = Home & "tmp-" & Format(Date, "YY-MM-DD") & "\"
    Word = Tmp & "word\"
    Set FSO = CreateObject("Scripting.FileSystemObject")
    If FSO.FolderExists(IIf(Right(Tmp, 1) = "\", Left(Tmp, Len(Tmp) - 1), Tmp)) Then FSO.DeleteFolder IIf(Right(Tmp, 1) = "\", Left(Tmp, Len(Tmp) - 1), Tmp)
    MkDir Tmp
    
    If Len(Dir(Tmp & ActiveDocument.Name & ".zip")) > 0 Then Kill Tmp & ActiveDocument.Name & ".zip"
    WordBasic.CopyFileA FileName:=ActiveDocument.FullName, Directory:=Tmp & ActiveDocument.Name & ".zip"
    Set sh = CreateObject("Shell.Application")
    sh.Namespace(IIf(Right(Tmp, 1) = "\", Left(Tmp, Len(Tmp) - 1), Tmp)).CopyHere sh.Namespace(Tmp & ActiveDocument.Name & ".zip").items
    Call ExtractFilesFromUnZip(Word, Tmp, Home)
    If FSO.FolderExists(IIf(Right(Tmp, 1) = "\", Left(Tmp, Len(Tmp) - 1), Tmp)) Then FSO.DeleteFolder IIf(Right(Tmp, 1) = "\", Left(Tmp, Len(Tmp) - 1), Tmp)
    
    Set FSO = Nothing
    Set sh = Nothing
    
    MsgBox "Files written to:" & vbCr & vbCr & Home, 64

End Sub
Sub ExtractFilesFromUnZip(Word, Tmp, Target)
    Dim XDoc, Node, Node1, Node2 As Object
    Dim OleHN, ShapeHN, OrgN As String
    Dim ridOle, ridShape As String

    Set XDoc = CreateObject("Microsoft.XMLDOM")
    XDoc.async = False
    XDoc.validateOnParse = False
    XDoc.Load (Word & "document.xml")
    For Each Node In XDoc.getElementsByTagName("*/w:object")
        OrgN = ""
        ridOle = ""
        ridShape = ""
        For Each Node1 In Node.ChildNodes
            If LCase(Node1.BaseName) = "oleobject" Then
                ridOle = Node1.Attributes.getNamedItem("r:id").Text
            ElseIf LCase(Node1.BaseName) = "shape" Then
                Set Node2 = Node1.SelectSingleNode("v:imagedata")
                If Not (Node2 Is Nothing) Then ridShape = Node2.Attributes.getNamedItem("r:id").Text
            End If
            If ridOle <> "" And ridShape <> "" Then
                Call ParseRels(Word, ridOle, ridShape, OleHN, ShapeHN)
                If ShapeHN <> "" Then
                    Call GetNameFromIcon(Word & ShapeHN, OrgN)
                    Select Case LCase$(Mid$("???" & OrgN, IIf(InStrRev(OrgN, ".") < 1, 1, InStrRev(OrgN, ".") + 4), 3))
                    Case "pdf"
                        Call ExtractFile(37, 80, 68, 70, 45, Word & OleHN, 999, 0, 0, 0, 0, 0, Target & OrgN, 0, 0)
                    Case "htm"
                        Call ExtractFile(-33, 60, 33, 256, 256, Word & OleHN, 256, 256, 0, 0, 0, 256, Target & OrgN, 1, -1)
                    Case "wav"
                        Call ExtractFile(0, 82, 73, 70, 70, Word & OleHN, 93, 0, 0, 0, 256, 0, Target & OrgN, 1, -2)
                    Case "txt"
                        Call ExtractFile(116, 0, 256, 256, 0, Word & OleHN, 10, 256, 0, 0, 0, 256, Target & OrgN, 6, -6)
                    Case "rar"
                        Call ExtractFile(82, 97, 114, 33, 26, Word & OleHN, 92, 0, 0, 0, 256, 0, Target & OrgN, 0, -1)
                    Case "exe"
                        Call ExtractFile(77, 90, 144, 0, 3, Word & OleHN, 97, 0, 0, 0, 256, 0, Target & OrgN, 0, -1)
                    Case "zip"
                        Call ExtractFile(80, 75, 3, 4, 256, Word & OleHN, 0, 0, 256, 40, 0, 0, Target & OrgN, 0, 7)
                    Case "mp4"
                        Call ExtractFile(0, 32, 102, 116, 121, Word & OleHN, 85, 0, 0, 0, 67, 0, Target & OrgN, -2, 1)
                    Case "avi"
                        Call ExtractFile(0, 82, 73, 70, 70, Word & OleHN, 0, 85, 0, 0, 0, 67, Target & OrgN, 1, -1)
                    Case "???"
                    Case Else
                        Call CopyWithDateTime(Word & OleHN, Target, OrgN)
                    End Select
                End If
                Exit For
            End If
        Next Node1
    Next Node
    Set XDoc = Nothing
    Set Node = Nothing
    Set Node1 = Nothing
    Set Node2 = Nothing

End Sub
Sub ExtractFile(ByVal A0, ByVal A1, ByVal A2, ByVal A3, ByVal A4, ByVal OleFN, ByVal Z0, ByVal Z1, ByVal Z2, ByVal Z3, ByVal Z4, ByVal Z5, ByVal TextFN, ByVal offset, ByVal length)
    Dim i, j, nFile As Long
    Dim L() As Byte
    Dim B() As Byte
    
    If Not FileOpen(OleFN, B) Then Exit Sub
    For i = 0 To UBound(B) - 64
        If IIf(A0 < 0, B(i) < A0 * -1, B(i) = A0) And IIf(A1 = 256, B(i + 1) > 0, B(i + 1) = A1) And IIf(A2 = 256, B(i + 2) > 0, B(i + 2) = A2) And IIf(A3 = 256, B(i + 3) > 0, B(i + 3) = A3) And IIf(A4 = 256, B(i + 4) > 0, B(i + 4) = A4) Then Exit For
    Next
    If Z0 < 257 Then
        For j = UBound(B) - 16 To i - 64 Step -1
            If IIf(Z0 = 256, B(j) > 0, B(j) = Z0) And IIf(Z1 = 256, B(j + 1) > 0, B(j + 1) = Z1) And IIf(Z2 = 256, B(j + 2) > 0, B(j + 2) = Z2) And B(j + 3) = Z3 And IIf(Z4 = 256, B(j + 4) > 0, B(j + 4) = Z4) And IIf(Z5 = 256, B(j + 5) > 0, B(j + 5) = Z5) Then Exit For
        Next
    Else
        j = UBound(B)
    End If
    ReDim L(0 To j - i + length)
    For j = 0 To IIf(UBound(L) + i + offset > UBound(B), UBound(L) + i - length, UBound(L))
        L(j) = B(i + j + offset)
    Next
    nFile = FreeFile
    Open TextFN For Binary Access Write As nFile
    Put nFile, , L
    Close nFile
    
End Sub
Sub ParseRels(Word, ridOle, ridShape, OleHN, ShapeHN)
    Dim RDoc, RNode As Object
    
    OleHN = ""
    ShapeHN = ""
    Set RDoc = CreateObject("Microsoft.XMLDOM")
    RDoc.async = False
    RDoc.validateOnParse = False
    RDoc.Load (Word & "_rels\document.xml.rels")
    For Each RNode In RDoc.getElementsByTagName("*/Relationship")
        Select Case RNode.Attributes.getNamedItem("Id").Text
        Case ridOle
            OleHN = Replace(RNode.Attributes.getNamedItem("Target").Text, "/", "\")
        Case ridShape
            ShapeHN = Replace(RNode.Attributes.getNamedItem("Target").Text, "/", "\")
        End Select
    Next
    Set RDoc = Nothing
    Set RNode = Nothing
    
End Sub
Sub GetNameFromIcon(ByVal ShapeFN, OrgN)
    Dim Fstart, Fstopp As Long
    Dim S As String
    Dim B() As Byte
    
    If Dir(ShapeFN) = vbNullString Then Exit Sub
    If Not FileOpen(ShapeFN, B) Then Exit Sub
    S = B
    If InStrRev(S, "IconOnly") > 0 Then
        Fstopp = InStrRev(S, Chr(0) & Chr(70) & Chr(0) & Chr(16) & Chr(0) & Chr(2) & Chr(0)) - 1
    Else
        Fstopp = InStrRev(S, Chr(9) & Chr(0) & Chr(9) & Chr(0)) - 1
    End If
    Fstart = InStrRev(S, Chr(0), Fstopp - 1)
    OrgN = TrimStr(Mid$(S, Fstart, Fstopp - Fstart + 1))
    
End Sub
Function FileOpen(FN, B() As Byte) As Boolean
    Dim nFile As Integer
    
    FileOpen = False
    nFile = FreeFile
    Open FN For Binary Access Read As nFile
    If LOF(nFile) > 0 Then
        ReDim B(LOF(nFile) - 1)
        Get nFile, , B
        FileOpen = True
    End If
    Close nFile
    
End Function
Function TrimStr(OrgN)
    Dim j, j0 As Long
    
    For j = Len(OrgN) To 1 Step -1
        If Asc(Mid$(OrgN, j, 1)) > 32 Then Exit For
    Next
    OrgN = Mid$(OrgN, 1, j)
    For j = 1 To Len(OrgN)
        If Asc(Mid$(OrgN, j, 1)) > 32 Then Exit For
    Next
    TrimStr = Mid$(OrgN, j, Len(OrgN) - j + 1)
    
End Function
Sub CopyWithDateTime(Source, Target, Name)
    Dim oFile, FSO As Object
    
    Set FSO = CreateObject("Scripting.FileSystemObject")
    Call FSO.CopyFile(Source, Target & Name, True)
    Set oFile = CreateObject("Shell.Application").Namespace(Target).ParseName(Name)
    oFile.ModifyDate = FormatDateTime(Date, 2) & " " & FormatDateTime(Time, 3)
    Set oFile = Nothing
    Set FSO = Nothing
    
End Sub

0 个答案:

没有答案
相关问题