Python知识图谱助力数据分析的价值与落地路径
一、核心价值
二、典型应用场景
三、端到端落地流程与Python工具
四、最小可行示例
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt
from sklearn.cluster import DBSCAN
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
# 1) 构造三元组
triples = [
('drugA', 'treats', 'fever'), ('drugB', 'treats', 'hepatitis'),
('drugC', 'treats', 'bleeding'), ('drugD', 'treats', 'pain'),
('drugA', 'inhibits', 'gene1'), ('drugC', 'inhibits', 'gene2'),
('drugD', 'inhibits', 'gene4'), ('drugE', 'inhibits', 'gene20'),
('gene1', 'associated', 'obesity'), ('gene2', 'associated', 'heart_attack'),
('gene3', 'associated', 'hepatitis'), ('gene4', 'associated', 'bleeding'),
('gene50', 'associated', 'cancer'), ('gene2', 'interacts', 'gene1'),
('gene3', 'interacts', 'gene20'), ('gene4', 'interacts', 'gene50')
]
df = pd.DataFrame(triples, columns=['head', 'relation', 'tail'])
# 2) 构建图
G = nx.Graph()
for _, r in df.iterrows():
G.add_edge(r['head'], r['tail'], label=r['relation'])
# 3) 基本统计
print(f"节点数: {G.number_of_nodes()}, 边数: {G.number_of_edges()}")
# 4) 简单可视化
pos = nx.spring_layout(G, seed=42, k=0.9)
edge_labels = nx.get_edge_attributes(G, 'label')
plt.figure(figsize=(10, 8))
nx.draw(G, pos, with_labels=True, node_color='skyblue', node_size=700, edge_color='gray', alpha=0.6)
nx.draw_networkx_edge_labels(pos, G, edge_labels=edge_labels, font_size=8)
plt.title("Mini Knowledge Graph")
plt.show()
# 5) 基于邻接矩阵的嵌入与聚类(演示用)
A = nx.to_numpy_array(G, nodelist=list(G.nodes))
D = np.diag(A.sum(axis=1))
L = D - A # 拉普拉斯矩阵(未归一化)
eigvals, eigvecs = np.linalg.eigh(L)
# 取前2个非平凡特征向量作为嵌入
X = eigvecs[:, 1:3]
labels = DBSCAN(eps=0.5, min_samples=2).fit_predict(X)
plt.figure(figsize=(10, 8))
nx.draw(G, pos, with_labels=True, node_color=labels, cmap=plt.cm.Set1, node_size=700, edge_color='gray', alpha=0.6)
plt.title("Graph Clustering with DBSCAN on Laplacian Eigenmaps")
plt.show()
五、实施建议
免责声明:本站发布的内容(图片、视频和文字)以原创、转载和分享为主,文章观点不代表本网站立场,如果涉及侵权请联系站长邮箱:is@yisu.com进行举报,并提供相关证据,一经查实,将立刻删除涉嫌侵权内容。