将CSV分层数据文件导入Neo4j.py

时间:2010-09-07 10:11:11

标签: neo4j

我想知道是否有一个Python脚本可以将csv分层数据文件导入到neo4j.py。

我制作了下面的python脚本,但是我需要一种更好的方法来检查节点是否已经在创建之前创建。

graphdb = neo4j.GraphDatabase(lparam_database_path, classpath=lparam_kernel_class_path, jvm=lparam_jvm_path, heap_size=ls_jvm_heap_size)

with graphdb.transaction:
    sectornode = Subreference.Node.SECTOR_ROOT(graphdb, label="Sectors", level=1)
    categorynode = graphdb.node(label="Categories", level=2)
    productnode = graphdb.node(label="Products", level=3)
    brandnode = graphdb.node(label="Brands", level=4)
    subbrandnode = graphdb.node(label="Subbrands", level=5)

    relationship1 = sectornode.CATEGORY_ROOT(categorynode)
    relationship2 = categorynode.PRODUCT_ROOT(productnode)
    relationship3 = productnode.BRAND_ROOT(brandnode)
    relationship4 = brandnode.SUBBRAND_ROOT(subbrandnode)

    li_index = 0
    ls_last_sector = ""
    ls_last_category = ""
    ls_last_product = ""
    ls_last_brand = ""
    ls_last_subbrand = ""
    for data in data_list:
        li_index = li_index + 1
        print "importing row " + str(li_index) + " of " + str(li_total_rows)

        if data[0] != ls_last_sector:
            tempsectornode = graphdb.node(label=data[0])
            lastsectornode = tempsectornode
        if data[1] != ls_last_category or data[0] != ls_last_sector:
            tempcategorynode = graphdb.node(label=data[1])
            lastcategorynode = tempcategorynode
        if data[2] != ls_last_product or data[1] != ls_last_category or data[0] != ls_last_sector:
            tempproductnode = graphdb.node(label=data[2])
            lastproductnode = tempproductnode
        if data[3] != ls_last_brand or data[2] != ls_last_product or data[1] != ls_last_category or data[0] != ls_last_sector:
            tempbrandnode = graphdb.node(label=data[3])
            lastbrandnode = tempbrandnode
        if data[4] != ls_last_subbrand or data[3] != ls_last_brand or data[2] != ls_last_product or data[1] != ls_last_category or data[0] != ls_last_sector:
            tempsubbrandnode = graphdb.node(label=data[4])
            lastsubbrandnode = tempsubbrandnode

        temprelationship1 = tempsectornode.OFTYPE(sectornode)
        temprelationship2 = tempcategorynode.OFTYPE(categorynode)
        temprelationship3 = tempproductnode.OFTYPE(productnode)
        temprelationship4 = tempbrandnode.OFTYPE(brandnode)
        temprelationship5 = tempsubbrandnode.OFTYPE(subbrandnode)

        temprelationship6 = tempcategorynode.MEMBEROF(tempsectornode)
        temprelationship7 = tempproductnode.MEMBEROF(tempcategorynode)
        temprelationship8 = tempbrandnode.MEMBEROF(tempproductnode)
        temprelationship9 = tempsubbrandnode.MEMBEROF(tempbrandnode)

        ls_last_sector = data[0]
        ls_last_category = data[1]
        ls_last_product = data[2]
        ls_last_brand = data[3]
        ls_last_subbrand = data[4]

graphdb.shutdown()

1 个答案:

答案 0 :(得分:1)

您应该能够在创建后为节点编制索引,然后通过索引API检查它是否已存在。我手头没有neo4j.py API,但它应该公开这个功能。

原则方法(在Java中)是:

graphDatabase.index().forNodes("index-name");
index.add(node, "key","value");

Node node=index.get("key","value").getSingle();