数据框交替数组

时间:2019-07-29 08:55:58

标签: python excel vba pandas

我目前有一个Excel文件,其中包含以下信息:

Company Initial     Purchase Number
ABCD            A123456789
ABCD            B123456789
BCDE            C123456789
BCDE            D123456789
BCDE            E123456789
CDEF            F123456789
DEFG            G123456789
DEFG            H123456789
DEFG            I123456789
DEFG            J123456789
DEFG            K123456789

我想把它变成一张桌子,以使同一公司的名字缩写不连续出现。

Company Initial     Purchase Number
DEFG            K123456789
ABCD            A123456789
DEFG            G123456789
ABCD            B123456789
DEFG            J123456789
BCDE            C123456789
DEFG            I123456789
BCDE            D123456789
DEFG            H123456789
BCDE            E123456789
CDEF            F123456789

我目前在Pandas和VBA中都在尝试这种方法,但似乎无法将自己的想法束之高阁。我也对其他Python库开放。

谢谢。

4 个答案:

答案 0 :(得分:1)

借助内存中的列表框的另一种可能性:

Assert.assertEquals("", actu<cursor>al);

答案 1 :(得分:0)

这里没有保证的解决方案;如果有10个ABCD和1个DEFG,则无法完成。考虑到这一点,有一些方法并不是最优的,但至少会给它一个公平的机会。
简而言之:

  1. 创建列表列表,其中子列表仅包含一个公司
  2. 在每个子列表中的条目数之后对主列表进行排序。最后倒数
  3. 将第一个条目从第一个列表移至目标列表
  4. 将第一个条目从第二个列表移至目标列表
  5. 从3开始重复

答案 2 :(得分:0)

这变得非常笨拙,但是以某种方式在有限的测试中起作用。可以尝试看看它是否可以承受扩展测试

    Option Explicit
    Sub arrangeArray()
    Dim Arr As Variant, Rslt As Variant, Dict As Dictionary
    Dim MxCnt As Long, i As Long, j As Long, MxKey As String, Rw As Long
    Dim Ky As String, PosInArr As Long, ArrLen As Long, RwCnt As Long
    Dim temp1 As Variant, temp2 As Variant
    Set Dict = New Dictionary
    Arr = Range("A1:B12").Value
    ReDim Rslt(1 To UBound(Arr, 1), 1 To 2)
    ArrLen = UBound(Arr, 1)

     MxKey = ""
     MxCnt = 0
        'Company names taken as keys in a dictionary, values are incremented to number of occurrences
        For i = 1 To ArrLen
        Ky = Arr(i, 1)
            If Dict.Exists(Ky) Then
            Dict(Ky) = Dict(Ky) + 1
            Else
            Dict.Add Ky, 1
            End If

            If MxCnt < Dict(Ky) Then
            MxKey = Ky
            MxCnt = Dict(Ky)
            End If
        Next


        If ArrLen - MxCnt < MxCnt - 1 Then
        MsgBox " it is not possible to Arrange Array Since Total remaining Company names other than " & MxKey & " (occurs " & MxCnt & " times ) is only " & ArrLen - MxCnt & " less than " & MxCnt - 1
        Exit Sub
        End If

        'Dictionary taken to array Arr2 for bubble sort
        i = Dict.Count
        Dim arr2 As Variant
        ReDim arr2(1 To i, 1 To 2)
        For i = 1 To Dict.Count
            arr2(i, 1) = Dict.Keys(i - 1)
            arr2(i, 2) = Dict.Items(i - 1)
        Next i

        'Bubble sort Arr2
        For i = 1 To UBound(arr2, 1) - 1
            For j = i + 1 To UBound(arr2, 1)
                If arr2(i, 2) < arr2(j, 2) Then
                    temp1 = arr2(j, 1)
                    temp2 = arr2(j, 2)
                    arr2(j, 1) = arr2(i, 1)
                    arr2(j, 2) = arr2(i, 2)
                    arr2(i, 1) = temp1
                    arr2(i, 2) = temp2
                End If
            Next j
        Next i

        'First available position of the key in original array Arr
        For i = 1 To Dict.Count
        Ky = arr2(i, 1)
        arr2(i, 2) = 0
            For j = 1 To ArrLen
                If Arr(j, 1) = Ky Then
                arr2(i, 2) = j   'First available position of the key in Arr
                Exit For
                End If
            Next
        Next i

'Create result array as populating it each company names as long available in original array
    Rw = 1
    Do
        RwCnt = 0
        For i = 1 To Dict.Count
        Ky = arr2(i, 1)
        PosInArr = arr2(i, 2)
            If PosInArr > 0 Then
            Rslt(Rw, 1) = Ky
            Rslt(Rw, 2) = Arr(PosInArr, 2)
            Rw = Rw + 1
            RwCnt = RwCnt + 1
            arr2(i, 2) = 0
                'Find Next available Ky in Arr
                    For j = PosInArr + 1 To ArrLen
                        If Arr(j, 1) = Ky Then
                        arr2(i, 2) = j     'next available position of the key in Arr
                        Exit For
                        End If
                     Next j

            If Rw > ArrLen Then Exit For  
            If RwCnt = 2 Then Exit For   ' exit to next Do loop after two rows
            End If
        Next i
    If Rw > ArrLen Then Exit Do
    Loop

    Range("D1").Resize(UBound(Rslt, 1), 2).Value = Rslt

    End Sub

结果就像

ABCD    A123456789      DEFG    G123456789
ABCD    B123456789      BCDE    C123456789
BCDE    C123456789      DEFG    H123456789
BCDE    D123456789      BCDE    D123456789
BCDE    E123456789      DEFG    I123456789
CDEF    F123456789      BCDE    E123456789
DEFG    G123456789      DEFG    J123456789
DEFG    H123456789      ABCD    A123456789
DEFG    I123456789      DEFG    K123456789
DEFG    J123456789      ABCD    B123456789
DEFG    K123456789      DEFG    K123456789
DEFG    K123456789      CDEF    F123456789

答案 3 :(得分:0)

对于那些感兴趣的人,我想出了使用Pandas的Python解决方案

import pandas as pd
import sys

# Create dataframe
df = pd.read_excel(sys.argv[1] + "\\data\\Book1.xlsx")

# Get count and merge values
dfKeys = df["Company Initial"].value_counts().keys().to_list()                                  # Get list of names for each initial
dfValues = df["Company Initial"].value_counts().to_list()                                       # Get list of count for each initial

dfList = []
dfDict = {}

# Combining keys and values into list, creating dictionary for each initial
for i in range(0,len(dfKeys)):
    dfList.append([dfKeys[i], dfValues[i]])                                                     # Create list with initial and count
    dfDict[dfList[i][0]] = df[df["Company Initial"] == dfList[i][0]]                            # Dictionary for each initial

# New Dataframe
df = df[0:0]                                                                                    # Reset dataframe
initial = ""                                                                                    # Declare initial

# Shuffling
for x in range(0, len(dfList)):                                                                 # For every initial
    while dfList[x][1] > 0:                                                                     # While initial count is larger than 0

        # Failsafe: no more other initials with count remaining
        if all(item[1] == 0 for item in dfList[(x + 1):len(dfList)]):
            while dfList[x][1] > 0:                                                             # While initial count is larger than 0
                df = df.append(dfDict[dfList[x][0]][(dfList[x][1] - 1):dfList[x][1]])           # Add primary record
                dfList[x][1] = dfList[x][1] - 1                                                 # Reduce count by 1

        # Initials with count exist
        else:
            for i in range(x + 1,len(dfList)):                                                  # For every other carrier
                if dfList[x][1] == 0:
                    break
                elif dfList[i][1] > 0:
                    if initial != dfList[x][0]:                                                 # If previous carrier is not same as current carrier
                        df = df.append(dfDict[dfList[x][0]][(dfList[x][1] - 1):dfList[x][1]])   # Add primary record
                        dfList[x][1] = dfList[x][1] - 1                                         # Reduce count by 1
                    df = df.append(dfDict[dfList[i][0]][(dfList[i][1] - 1):dfList[i][1]])       # Add secondary record
                    dfList[i][1] = dfList[i][1] - 1                                             # Reduce count by 1
                    initial = df.iloc[len(df) - 1]["Company Initial"]

df = df.reset_index()                                                                           # Remove first row and reset index numbers
del df["index"]                                                                                 # Delete index column

print(df)

这将创建以下数据框:

   Company Initial Purchase Number
0             DEFG      K123456789
1             BCDE      E123456789
2             DEFG      J123456789
3             ABCD      B123456789
4             DEFG      I123456789
5             CDEF      F123456789
6             DEFG      H123456789
7             BCDE      D123456789
8             DEFG      G123456789
9             ABCD      A123456789
10            BCDE      C123456789
相关问题