networkx搭配Neo4j

为了完成较大规模的图分析工作,networkx搭配Neo4j是一种很好的组合。

一、往Neo4j中导入数据

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
from py2neo import Graph, Node, Relationship, NodeMatcher, RelationshipMatcher
import pandas as pd

#========================================================
# 连接图数据库
#========================================================

# 连接neo4j数据库,输入地址、用户名、密码
graph = Graph('http://localhost:7474', username='neo4j', password='test')
node_matcher = NodeMatcher(graph)
relationship_matcher = RelationshipMatcher(graph)

#========================================================
# 从本地加载数据
#========================================================

data = pd.read_excel('neo4j_data.xlsx', header=None)
data.rename(columns={0:'node1_name', 1:'node1_label', 2:'node2_name', 3:'node2_label', 4:'relationship'}, inplace=True)

#========================================================
# 清空图
#========================================================

NEED_DELETE_BEFORE = True

if NEED_DELETE_BEFORE:
graph.delete_all() # 清除数据

#========================================================
# 生成图结构
#========================================================

for row in data.itertuples():
source = node_matcher.match(getattr(row, "node1_label"), name=getattr(row, "node1_name")).first()
if not source: # 如果没有找到目标顶点, 则创建目标顶点
source = Node(getattr(row, "node1_label"), name=getattr(row, "node1_name"))
graph.create(source)

target = node_matcher.match(getattr(row, "node2_label"), name=getattr(row, "node2_name")).first()
if not target:
target = Node(getattr(row, "node2_label"), name=getattr(row, "node2_name"))
graph.create(target)

r = relationship_matcher.match(nodes=(source, target), r_type=getattr(row, "relationship")).first()
if not r:
r = Relationship(source, getattr(row, "relationship"), target, weight=1)
graph.create(r)

d = graph.run("MATCH (a:city) RETURN a.name LIMIT 10").data()
print(d)

二、从Neo4j中读取数据

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
from py2neo import Graph
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt

#========================================================
# 连接图数据库
#========================================================

# 连接neo4j数据库,输入地址、用户名、密码
graph = Graph('http://localhost:7474', username='neo4j', password='test')

#========================================================
# 生成子图
#========================================================
G = nx.DiGraph()

df = pd.DataFrame(graph.run("MATCH (a:city)-[:to]->(b:city) RETURN a.name as source, b.name as target").data())

for index, rows in df.iterrows():
source = rows['source']
target = rows['target']
G.add_node(source, node_type='city')
G.add_node(target, node_type='city')
G.add_edge(source, target)

print(nx.info(G))
nx.draw_networkx(G)
plt.show()
0%