从导出到excel的扫描pdf文件中提取数据

时间:2017-09-11 18:47:24

标签: excel vba excel-vba

以下是我正在使用的Excel工作簿的数据结构的一些屏幕上限:

enter image description here

enter image description here

enter image description here

enter image description here

enter image description here

enter image description here

好的,我已经完成并根据每个人的说法编辑了代码。它仍然需要做很多工作。

我现在所困扰的是错误处理。显然,如果找不到关键字之一 - LastFirstMiddleRank,则会给我一个错误。

如果关键字后面没有值(字),那么我最终要做的是输出空白,如果有关键字,则输出值。如果缺少关键字,我想输出一个空白。值字也可以在关键字下面的行中。我想在这种情况下输出该值。

我现在正尝试使用If - Else语句执行此操作。但是,我认为它们可能写错了,因为如果找不到关键字,我会收到错误。

Option Explicit

Sub find2()

Dim lrd As Long
Dim lrdWS1 As Long
Dim iRow As Integer
Dim celltosplit As String
Dim result As String

'--------------------------------------------------------------------------------------------------------------------------------------

        lrdWS1 = Sheets("Table 1").Cells(Sheets("Table 1").Rows.count, 1).End(xlUp)(2).Row

        Sheets.Add(After:=Sheets(Sheets.count)).name = "FieldValues"

        lrd = Sheets("FieldValues").Cells(Sheets("FieldValues").Rows.count, 1).End(xlUp)(1).Row

        Worksheets("Table 1").Activate

'--------------------------------------------------------------------------------------------------------------------------------------

Do While Worksheets("Table 1").Activate And Cells.find(What:="Last", After:=ActiveCell, LookIn:=xlFormulas, LookAt _
        :=xlPart, SearchOrder:=xlByRows, SearchDirection:=xlNext, MatchCase:= _
        False, SearchFormat:=False).Activate

Application.Goto (Cells(1, 1))


'--------------------------------------------------------------------------------------------------------------------------------------

    Worksheets("Table 1").Activate
        Application.Goto (Cells(1, 1))

        If Cells.find(What:="Last", After:=ActiveCell, LookIn:=xlFormulas, LookAt _
                    :=xlPart, SearchOrder:=xlByRows, SearchDirection:=xlNext, MatchCase:= _
                    False, SearchFormat:=False).Activate Then

                    Selection.Copy
                    Sheets("FieldValues").Activate
                    Range("A" & lrd).Activate
                    ActiveSheet.Paste
                    Columns("A:A").EntireColumn.AUTOFIT


                    Cells.Replace What:="Last", Replacement:="", LookAt:=xlPart, SearchOrder _
                        :=xlByRows, MatchCase:=False, SearchFormat:=False, ReplaceFormat:=False

                    lrd = Sheets("FieldValues").Cells(Sheets("FieldValues").Rows.count, 1).End(xlUp)(2).Row

                   Worksheets("Table 1").Activate
                        ActiveCell.UnMerge
                        Selection.Replace What:="Last", Replacement:="", LookAt:=xlPart, SearchOrder _
                                    :=xlByRows, MatchCase:=False, SearchFormat:=False, ReplaceFormat:=False


         Else
                Cells(1, lrd) = ""
                lrd = Sheets("FieldValues").Cells(Sheets("FieldValues").Rows.count, 1).End(xlUp)(2).Row

         End If




'-------------------------------------------------------------------------------------------------------------------------------------

        Worksheets("Table 1").Activate
        Application.Goto (Cells(1, 1))


        If Cells.find(What:="First", After:=ActiveCell, LookIn:=xlFormulas, LookAt _
                    :=xlPart, SearchOrder:=xlByRows, SearchDirection:=xlNext, MatchCase:= _
                    False, SearchFormat:=False).Activate Then

                         Selection.Copy
                         Sheets("FieldValues").Activate
                         Range("A" & lrd).Activate
                         ActiveSheet.Paste
                         Columns("A:A").EntireColumn.AUTOFIT


                         Cells.Replace What:="First", Replacement:="", LookAt:=xlPart, SearchOrder _
                           :=xlByRows, MatchCase:=False, SearchFormat:=False, ReplaceFormat:=False


                         lrd = Sheets("FieldValues").Cells(Sheets("FieldValues").Rows.count, 1).End(xlUp)(2).Row


                       Worksheets("Table 1").Activate
                            ActiveCell.UnMerge
                            Selection.Replace What:="First", Replacement:="", LookAt:=xlPart, SearchOrder _
                            :=xlByRows, MatchCase:=False, SearchFormat:=False, ReplaceFormat:=False


        Else
            Cells("1", lrd) = ""
            lrd = Sheets("FieldValues").Cells(Sheets("FieldValues").Rows.count, 1).End(xlUp)(2).Row

        End If


 '-------------------------------------------------------------------------------------------------------------------------------------

        Worksheets("Table 1").Activate
        Application.Goto (Cells(1, 1))

        If Cells.find(What:="Middle", After:=ActiveCell, LookIn:=xlFormulas, LookAt _
                    :=xlPart, SearchOrder:=xlByRows, SearchDirection:=xlNext, MatchCase:= _
                    False, SearchFormat:=False).Activate Then

                        Selection.Copy
                        Sheets("FieldValues").Activate
                        Range("A" & lrd).Activate
                        ActiveSheet.Paste
                        Columns("A:A").EntireColumn.AUTOFIT


                        Cells.Replace What:="Middle", Replacement:="", LookAt:=xlPart, SearchOrder _
                         :=xlByRows, MatchCase:=False, SearchFormat:=False, ReplaceFormat:=False


                        lrd = Sheets("FieldValues").Cells(Sheets("FieldValues").Rows.count, 1).End(xlUp)(2).Row


                        Worksheets("Table 1").Activate
                            ActiveCell.UnMerge
                            Selection.Replace What:="Middle", Replacement:="", LookAt:=xlPart, SearchOrder _
                            :=xlByRows, MatchCase:=False, SearchFormat:=False, ReplaceFormat:=False


        Else
            Cells("A", lrd) = ""
            lrd = Sheets("FieldValues").Cells(Sheets("FieldValues").Rows.count, 1).End(xlUp)(2).Row

        End If



'-----------------------------------------------------------------------------------------------------------------------------------------------------------------

        Worksheets("Table 1").Activate
        Application.Goto (Cells(1, 1))


        If Cells.find(What:="Rank", After:=ActiveCell, LookIn:=xlFormulas, LookAt _
                    :=xlPart, SearchOrder:=xlByRows, SearchDirection:=xlNext, MatchCase:= _
                    False, SearchFormat:=False).Activate Then

                        Selection.Copy
                        Sheets("FieldValues").Activate
                        Range("A" & lrd).Activate
                        ActiveSheet.Paste
                        Columns("A:A").EntireColumn.AUTOFIT


                        Cells.Replace What:="Rank", Replacement:="", LookAt:=xlPart, SearchOrder _
                        :=xlByRows, MatchCase:=False, SearchFormat:=False, ReplaceFormat:=False

                        lrd = ActiveCell.Row + 2

                        Worksheets("Table 1").Activate

                            ActiveCell.UnMerge
                            Selection.Replace What:="Rank", Replacement:="", LookAt:=xlPart, SearchOrder _
                                    :=xlByRows, MatchCase:=False, SearchFormat:=False, ReplaceFormat:=False

          Else
            Cells("A", lrd) = ""
            lrd = ActiveCell.Row + 2

          End If

Loop

1 个答案:

答案 0 :(得分:1)

我很抱歉,但我必须说出来:你的代码完全混乱!每个人在评论中说的一切都适用。还有更多。

另外,你说:

  

在代码中我有一个循环设置运行,它第一次运行精细

不。不对。尝试为FirstMiddleRank字段值使用多个单词,看看你得到了什么!

您发布的特定问题是因为将字段值复制到FieldValues表后,而不是仅从找到的字段中删除字段名称,而是从中删除该字段名称 Table 1表格中的单元格!您使用的是Cells.Replace而不是Selection.Replace

但是,使用Replace()函数代替<Range>.Replace方法,你会远远更远,例如:

Selection.value = replace(Selection.value2,"Last","")

请注意,我在 没有办法 提倡使用Selection。正确的方法是使用范围对象变量,例如rngFoundField,并像这样使用它:

rngFoundField.value = replace(rngFoundField.value2,"Last","")

编辑: v0.2 - 添加了基本ID提取

根据提供的屏幕上限,我设法编写了一个程序,可以正确提取四个字段Last First MiddleRank的值并将它们输出到新表:

'============================================================================================
' Module     : <in any standard module>
' Version    : 0.2
' Part       : 1 of 1
' References : Microsoft Scripting Runtime
' Source     : https://stackoverflow.com/a/46166984/1961728
'============================================================================================
Private Enum i_
    ž__NONE = 0
  ID
  Last
  First
  Middle
  Rank
    ž__
    ž__FIRST = ž__NONE + 1
    ž__LAST = ž__ - 1
End Enum

Public Sub ExtractFieldValues()

  Const l_Table_1     As String = "Table 1"
  Const l_FieldValues As String = "FieldValues"
  Const l_last_first_middle As String = "last first middle"
  Const s_FieldNames        As String = "id " & l_last_first_middle & " rank"
  Const n_OutputRowsPerRecord As Long = 6

  Dim ƒ As Excel.WorksheetFunction: Set ƒ = Excel.WorksheetFunction
  Dim ¡ As Long

  With Worksheets
    On Error Resume Next
    .Add(After:=.Item(.Count)).Name = l_FieldValues
    On Error GoTo 0
    Application.DisplayAlerts = False
      If .Item(.Count).Name <> l_FieldValues Then
        .Item(.Count).Delete
        .Item(l_FieldValues).UsedRange.Clear
      End If
      .Item(l_FieldValues).Columns(1).NumberFormat = "@"
    Application.DisplayAlerts = True
    .Item(l_Table_1).Activate
  End With

  Dim astrFieldNames() As String
  astrFieldNames = Split(" " & s_FieldNames, " ") ' Force index zero to a blank -> treat as base 1
  Dim dictFields As Scripting.Dictionary '##Late Binding: CreateObject("Scripting.Dictionary")
  Set dictFields = New Scripting.Dictionary '##Late Binding: As Object
  With dictFields
    .CompareMode = TextCompare
    For ¡ = i_.ž__FIRST To i_.ž__LAST
      dictFields.Add astrFieldNames(¡), ""
    Next ¡
  End With
  Dim lngLastUsedRow As Long
  lngLastUsedRow _
  = Cells _
     .Find _
      ( _
        What:="*" _
      , After:=Cells(1) _
      , LookIn:=xlFormulas _
      , Lookat:=xlPart _
      , SearchOrder:=xlByRows _
      , SearchDirection:=xlPrevious _
      ) _
     .Row

  With Range(Rows(1), Rows(lngLastUsedRow))

    Dim arngFoundCells(i_.ž__FIRST To i_.ž__LAST) As Range
    For ¡ = i_.ž__FIRST To i_.ž__LAST
      Set arngFoundCells(¡) = .Find(What:=astrFieldNames(¡), After:=Cells(1))
    Next ¡
    Dim lngFirstFoundRow As Long
    lngFirstFoundRow _
    = ƒ.Min _
        ( _
          arngFoundCells(i_.Last).Row _
        , arngFoundCells(i_.First).Row _
        , arngFoundCells(i_.Middle).Row _
        )
    Dim lngOuputSheetNextRow As Long
    lngOuputSheetNextRow = 1

    Dim varFoundCell As Variant
    Dim lngNextFoundRow As Long
    Dim rngNextFindStart As Range
    Dim astrSplitValues() As String
    Dim strFoundValue As String
    Dim lngFieldCount As Long
    Do
      For ¡ = i_.ž__FIRST To i_.ž__LAST
'        Debug.Print arngFoundCells(¡).Address; " ";
        dictFields.Item(astrFieldNames(¡)) = ""
      Next ¡
'      Debug.Print
      Select Case True
        Case arngFoundCells(i_.First).Row = arngFoundCells(i_.Middle).Row:
          ' Edge case: missing rank (found rank is for next employee) -> copy first to rank (simplifies following code)
          If arngFoundCells(i_.Rank).Row <> arngFoundCells(i_.First).Row Then
            Set arngFoundCells(i_.Rank) = arngFoundCells(i_.First)
          End If
          For Each varFoundCell In arngFoundCells
            strFoundValue = ƒ.Trim(Replace(Replace(varFoundCell.Value2, vbLf, " "), ":", "")) & " "
            If strFoundValue Like "[']*" Then strFoundValue = Mid$(strFoundValue, 2)
            ' ID field: only retain the first word of value
            If LCase$(strFoundValue) Like astrFieldNames(i_.ID) & "*" Then
              strFoundValue = Left$(strFoundValue, InStr(InStr(strFoundValue, " ") + 1, strFoundValue, " "))
            End If
            ' Edge case: no last name value in merged cell -> assume value is in first cell of following row
            If LCase$(strFoundValue) Like astrFieldNames(i_.Last) & " " Then
              strFoundValue = ƒ.Trim(strFoundValue & Rows(varFoundCell.Row + 1).Cells(1).Value2) & " "
            End If
            ' Edge case: Field names only in row -> assume field values are on the following row
            If LCase$(strFoundValue) Like l_last_first_middle & "*" _
            And Len(strFoundValue) - Len(Replace(strFoundValue, " ", "")) < 5 _
            Then
              strFoundValue = ƒ.Trim(strFoundValue & Rows(varFoundCell.Row + 1).Cells(1).Value2) & " "
            End If
            astrSplitValues = Split(" " & strFoundValue, " ") ' Force index zero to a blank -> treat as base 1
            ' Array contains one/some/all field names first and then the values (with a possible extra blank value)
            lngFieldCount = Int(UBound(astrSplitValues) / 2)
            For ¡ = 1 To lngFieldCount
              dictFields.Item(astrSplitValues(¡)) = astrSplitValues(¡ + lngFieldCount)
            Next ¡
          Next varFoundCell
          ' Only allow the id to be on the previous row
          If arngFoundCells(i_.ID).Row <> arngFoundCells(i_.First).Row - 1 Then
            dictFields.Item(astrFieldNames(i_.ID)) = 0
          End If
        Case Else
          Debug.Print "  SKIPPED: ";
          For ¡ = i_.ž__FIRST To i_.ž__LAST
            Debug.Print arngFoundCells(¡).Address; " ";
          Next ¡
          Debug.Print
          For ¡ = i_.ž__FIRST To i_.ž__LAST
            Debug.Print "    "; ƒ.Trim(arngFoundCells(¡).Value2)
          Next ¡
          Debug.Print
      End Select
      Sheets(l_FieldValues).Columns(1).Cells(lngOuputSheetNextRow).Resize(n_OutputRowsPerRecord - 1).Value _
      = ƒ.Transpose(dictFields.Items)
      lngOuputSheetNextRow = lngOuputSheetNextRow + n_OutputRowsPerRecord
      Set rngNextFindStart = Rows(arngFoundCells(i_.First).Row + 2).Cells(1)
      For ¡ = i_.ž__FIRST To i_.ž__LAST
        Set arngFoundCells(¡) = .Find(What:=astrFieldNames(¡), After:=rngNextFindStart)
      Next ¡
      lngNextFoundRow _
      = ƒ.Min _
          ( _
            arngFoundCells(i_.Last).Row _
          , arngFoundCells(i_.First).Row _
          , arngFoundCells(i_.Middle).Row _
          )
    Loop While lngNextFoundRow <> lngFirstFoundRow

  End With

End Sub

我预计会有一些遗漏的边缘案例。希望这些将显示在VBE的即时窗口中。