用于在Excel中使用正则表达式的通用UDF

时间:2013-10-20 18:28:39

标签: regex excel excel-vba vba

我需要每周解析和汇总几千条文本行。 Excel通配符不够灵活,我想删除粘贴到Notepad ++中进行处理或提供给脚本的额外步骤。

以下是我提出的工具。它们仍然有点慢 - 在公司的笔记本电脑上可能每秒3000线 - 但它们很方便。

RXMatch - 返回第一个匹配项,返回子组的选项。

=RXMatch("Apple","A(..)",1) -> "pp"

RXCount - 计算匹配数

=RXCount("Apple","p") -> 2

RXPrint - 将第一个匹配和/或子组嵌入到模板字符串中

=RXPrint("Apple","(\S)\S+","\1 is for \0") -> "A is for Apple"

RXPrintAll - 将每个匹配嵌入到模板字符串中,加入结果

=RXPrintAll("Apple Banana","(\S)\S+","\1 is for \0") -> "A is for Apple, B is for Banana"

RXMatches - 返回一个垂直的匹配数组,返回一个子组的选项

=RXMatches("Apple Banana","\S+") -> {"Apple";"Banana"}

2 个答案:

答案 0 :(得分:3)

RXMatch

Public Function RXMatch(Text As String, Pattern As String, Optional Group As Integer = 0, Optional IgnoreCase As Boolean = True) As String
    Dim retval As String
    ' Takes a string and returns the matching text
    ' Text is the string to be searched
    ' Pattern is the regex pattern
    ' Group (optional) selects a parenthesized group (count the number of left parentheses preceding it to get the group number)
    ' IgnoreCase (optional) set to False for a case-sensitive search

    Dim RE As Object
    Dim Matches As Object

    Set RE = CreateObject("vbscript.regexp")
    RE.IgnoreCase = IgnoreCase
    RE.Pattern = Pattern

    Set Matches = RE.Execute(Text)

    If (Matches.Count > 0) Then
        If (Group > 0) Then
            retval = Matches(0).submatches(Group - 1)
        Else
            retval = Matches(0)
        End If
    Else
        retval = ""
    End If

    RXMatch = retval
End Function

RXCount

Public Function RXCount(Text As String, Pattern As String, Optional IgnoreCase As Boolean = True) As Integer
    Dim retval As Integer
    ' Counts the number of matches
    ' Text is the string to be searched
    ' Pattern is the regex pattern
    ' IgnoreCase (optional) set to False for a case-sensitive search

    Dim RE As Object
    Dim Matches As Object

    Set RE = CreateObject("vbscript.regexp")
    RE.IgnoreCase = IgnoreCase
    RE.Global = True

    RE.Pattern = Pattern
    Set Matches = RE.Execute(Text)

    retval = Matches.Count

    RXCount = retval
End Function

RXPrint

Public Function RXPrint(Text As String, Pattern As String, Optional Template As String = "\0", Optional IgnoreCase As Boolean = True) As String
    Dim retval As String
    ' Takes a string and returns a new string formatted according to the given template, using the first match found
    ' Text is the string to be searched
    ' Pattern is the regex pattern
    ' Template (optional) is a string which should contain group identifiers (\0 - \9) to be substituted with groups in the match
    ' IgnoreCase (optional) set to False for a case-sensitive search

    Dim REText, RETemplate As Object
    Dim MatchesText, MatchesTemplate As Object

    Set REText = CreateObject("vbscript.regexp")
    REText.IgnoreCase = IgnoreCase
    REText.Pattern = Pattern

    Set MatchesText = REText.Execute(Text)

    Set RETemplate = CreateObject("vbscript.regexp")
    RETemplate.Global = True
    RETemplate.Pattern = "(?:\\(.))|([^\\]+)"

    Set MatchesTemplate = RETemplate.Execute(Template)

    If (MatchesText.Count > 0) Then
        ReDim retArray(0 To MatchesTemplate.Count - 1) As String
        Dim escaped As String
        Dim plaintext As String
        For i = 0 To MatchesTemplate.Count - 1
            escaped = MatchesTemplate(i).submatches(0)
            plaintext = MatchesTemplate(i).submatches(1)
            If (Len(escaped) > 0) Then
                If (IsNumeric(escaped)) Then
                    Dim groupnum As Integer
                    groupnum = CInt(escaped)
                    If groupnum = 0 Then
                        retArray(i) = MatchesText(0)
                    ElseIf (groupnum > MatchesText(0).submatches.Count) Then
                        retArray(i) = "?"
                    Else
                        retArray(i) = MatchesText(0).submatches(groupnum - 1)
                    End If
                Else
                    retArray(i) = escaped
                End If
            Else
                retArray(i) = plaintext
            End If
        Next i
        retval = Join(retArray, "")
    Else
        retval = ""
    End If

    RXPrint = retval
End Function

RXPrintAll

Public Function RXPrintAll(Text As String, Pattern As String, Optional Template As String = "\0", Optional Delimiter As String = ", ", Optional IgnoreCase As Boolean = True) As String
    Dim retval As String
    ' Takes a string and returns a new string formatted according to the given template, repeated for each match
    ' Text is the string to be searched
    ' Pattern is the regex pattern
    ' Template (optional) is a string which should contain group identifiers (\0 - \9) to be substituted with groups in the match
    ' Delimiter (optional) specified how the results will be joined
    ' IgnoreCase (optional) set to False for a case-sensitive search

    Dim REText, RETemplate As Object
    Dim MatchesText, MatchesTemplate As Object

    Set REText = CreateObject("vbscript.regexp")
    REText.IgnoreCase = IgnoreCase
    REText.Global = True
    REText.Pattern = Pattern

    Set MatchesText = REText.Execute(Text)

    Set RETemplate = CreateObject("vbscript.regexp")
    RETemplate.Global = True
    RETemplate.Pattern = "(?:\\(.))|([^\\]+)"

    Set MatchesTemplate = RETemplate.Execute(Template)

    If (MatchesText.Count > 0) Then
        ReDim retArrays(0 To MatchesText.Count - 1)
        For j = 0 To MatchesText.Count - 1
            ReDim retArray(0 To MatchesTemplate.Count - 1) As String
            Dim escaped As String
            Dim plaintext As String
            For i = 0 To MatchesTemplate.Count - 1
                escaped = MatchesTemplate(i).submatches(0)
                plaintext = MatchesTemplate(i).submatches(1)
                If (Len(escaped) > 0) Then
                    If (IsNumeric(escaped)) Then
                        Dim groupnum As Integer
                        groupnum = CInt(escaped)
                        If groupnum = 0 Then
                            retArray(i) = MatchesText(j)
                        ElseIf (groupnum > MatchesText(j).submatches.Count) Then
                            retArray(i) = "?"
                        Else
                            retArray(i) = MatchesText(j).submatches(groupnum - 1)
                        End If
                    Else
                        retArray(i) = escaped
                    End If
                Else
                    retArray(i) = plaintext
                End If
            Next i
            retArrays(j) = Join(retArray, "")
        Next j
        retval = Join(retArrays, Delimiter)
    Else
        retval = ""
    End If
    RXPrintAll = retval
End Function

RXMatches

Public Function RXMatches(Text As String, Pattern As String, Optional Group As Integer = 0, Optional IgnoreCase As Boolean = True) As Variant
    Dim retval() As String
    ' Takes a string and returns all matches in a vertical array
    ' Text is the string to be searched
    ' Pattern is the regex pattern
    ' Group (optional) selects a parenthesized group (count the number of left parentheses preceding it to get the group number)
    ' IgnoreCase (optional) set to False for a case-sensitive search

    Dim RE As Object
    Dim Matches As Object

    Set RE = CreateObject("vbscript.regexp")
    RE.IgnoreCase = IgnoreCase
    RE.Global = True
    RE.Pattern = Pattern

    Set Matches = RE.Execute(Text)

    If (Matches.Count > 0) Then
        ReDim retval(0 To Matches.Count - 1)
        For i = 0 To Matches.Count - 1
            If (Group > 0) Then
                retval(i) = Matches(i).submatches(Group - 1)
            Else
                retval(i) = Matches(i)
            End If
        Next i
    Else
        ReDim retval(1)
        retval(0) = ""
    End If

    RXMatches = Application.Transpose(retval)
End Function

答案 1 :(得分:0)

处理UDF时,缓存创建的对象至关重要。

例如:

Public Function RegexTest(ByVal vHaystack As Variant, ByVal sPattern As String, Optional ByVal sFlags As String = "") As Boolean
    'If haystack is an error then return false
    If IsError(vHaystack) Then Exit Function
    
    'Stringify haystack
    Dim sHaystack As String: sHaystack = vHaystack
    
    'Cache regular expressions, especially important for formulae
    Static lookup As Object
    If lookup Is Nothing Then Set lookup = CreateObject("Scripting.Dictionary")
    
    'If cached object doesn't exist, create it
    Dim sKey As String: sKey = sPattern & "-" & sFlags
    If Not lookup.exists(sKey) Then
        'Create regex object
        Set lookup(sKey) = CreateObject("VBScript.Regexp")
        
        'Bind flags
        For i = 1 To Len(sFlags)
            Select Case Mid(sFlags, i, 1)
                Case "i"
                    lookup(sKey).IgnoreCase = True
                Case "g"
                    lookup(sKey).Global = True
            End Select
        Next
        
        'Set pattern
        lookup(sKey).Pattern = sPattern
    End If
    
    'Use test function of regex object
    RegexTest = lookup(sKey).test(sHaystack)

End Function

将其应用到您自己的函数中,您将看到它极大地提高了在大量单元上的执行速度。