Question

我正在尝试运行一个脚本，该脚本正在比较从某些json提取的两个数组。该脚本正在运行，但是根本没有优化。我需要有关if条件的帮助，我想知道是否有更好的方法（我100％肯定有）。非常感谢您的帮助！

def compare(id1, id2, nb):
    if id2 >= id1:
        return 1 - (id2 - id1) / nb
    else:
        return 1 - (id1 - id2) / nb

searchList = ['student', 'runner', 'fireman', 'chief', 'teacher', 'seller', 'fisher', 'policeman']

for e in searchList:

for id1, item1 in enumerate(data1[e]['items']):
    for id2, item2 in enumerate(data2[e]['items']):
        if e == 'runner':
            if item1['details']['profile_link'] == item2['details']['profile_link']:
                res += compare(id1, id2, nb)
        elif e == 'policeman' or e == 'products':
            if item1['title'] == item2['title']:
                res += compare(id1, id2, nb)
        elif e == 'fisher':
            if item1['description'] == item2['description']:
                res += compare(id1, id2, nb)
        elif e == 'chief':
            if item1['program']['one'] and item2['program']['one']:
                if item1['program']['one']['title'] == item1['program']['one']['title']:
                    res += compare(id1, id2, nb)
            elif item1['program']['two'] and item2['program']['two']:
                if item1['program']['two']['title'] == item1['program']['two']['title']:
                    res += compare(id1, id2, nb)
        else:
            if item1['profile_link'] == item2['profile_link']:
                res += compare(id1, id2, nb)

res = res * 1000

非常感谢您！

编辑＃1：

感谢您的帮助，

这是我的json结构的一个示例：

{
    "student": {
        "count": 1,
        "items": [
            {
                "index": 0,
                "profile_link": "xxx"
            }
        ]
    },
    "runner": {
        "count": 1,
        "items": [
            {
                "index": 0,
                "details": [
                    {
                        "profile_link": "xxx"
                    }
                ]
            }
        ]
    },
    "policeman": {
        "count": 1,
        "items": [
            {
                "index": 0,
                "title": "xxx"
            }
        ]
    },
    "fisher": {
        "count": 1,
        "items": [
            {
                "index": 0,
                "description": "xxx"
            }
        ]
    },
    "chief": {
            "count": 1,
            "items": {
                "program": {
                    "one": [
                        {
                            "index": 0,
                            "title": "xxx",
                        }
                    ],
                    "two": [
                        {
                            "index": 0,
                            "title": "xxx",
                        }
                    ]
                }
            }
    },
    "fireman": {
        "count": 1,
        "items": [
            {
                "index": 0,
                "profile_link": "xxx"
            }
        ]
    },
    "teacher": {
        "count": 1,
        "items": [
            {
                "index": 0,
                "profile_link": "xxx"
            }
        ]
    },
    "seller": {
        "count": 1,
        "items": [
            {
                "index": 0,
                "profile_link": "xxx"
            }
        ]
    }
}

Answer 1

最大的效率低下是O(n*m)运行时复杂性，其中n是data1项目的数量，m是data2中项目的数量。您可以通过以下方式将其更改为O(n+m)：使用数据1的所有项填充哈希图，然后直接获取数据2的所有相等元素。

尝试一下：

# Runner
data1_items_by_key = {}
for id1, item1 in enumerate(data1['runner']['items']):
    key = item1['details']['profile_link']
    if key not in data1_items_by_key:
        data1_items_by_key[key] = []
    data1_items_by_key[key].append(item1)
for id2, item2 in enumerate(data2['runner']['items']):
    key = item2['details']['profile_link']
    for item1 in data1_items_by_key.get(key, []):
        res += compare(id1, id2, nb)

# Policeman
data1_items_by_key = {}
for id1, item1 in enumerate(data1['policeman']['items']):
    key = item1['title']
    if key not in data1_items_by_key:
        data1_items_by_key[key] = []
    data1_items_by_key[key].append(item1)
for id2, item2 in enumerate(data2['policeman']['items']):
    key = item2['title']
    for item1 in data1_items_by_key.get(key, []):
        res += compare(id1, id2, nb)

我如何优化此循环？带有JSON的Python代码

1 个答案: