您的位置:首页 > 理论基础 > 计算机网络

【复杂网络社团发现】GN算法步骤详解(附python代码实现)

2020-01-13 14:01 696 查看

GN算法

作为一种分裂的层次聚类方法,GN算法通过不断从原图中删除边从而达到生成分裂树的目的(从而形成社团)

那么关键点就在于,如何评判删除标准了(本文暂仅详述步骤,对于这一点不作详细解答)

不带权

通过不断删除边介数最大的边并循环(每次都重新计算),直到图中没有边存在

步骤:

  1. 计算网络内所有边相对于源节点的边介数
  2. 删除边介数最高的边;
  3. 重复步骤1和步骤2;
  4. 直到网络中没有边存在,最后生成的分裂树即为划分的社团;

代码实现

python
源码
# -*- coding: utf-8 -*-

import networkx as nx
import matplotlib.pyplot as plt
import sys
sys.path.append('../')

class GN:
def __init__(self, G):
self.G_copy = G.copy()
self.G = G
self.partition = [[n for n in G.nodes()]]
self.all_Q = [0.0]
self.max_Q = 0.0
#         self.zidian={0.0:[100]}
self.zidian={0:[0]}
#     def GG(self):
#         return self.G_copy

#Using max_Q to divide communities
def run(self):
#Until there is no edge in the graph
while len(self.G.edges()) != 0:
#Find the most betweenness edge
edge = max(nx.edge_betweenness_centrality(self.G).items(),key=lambda item:item[1])[0]
#Remove the most betweenness edge
self.G.remove_edge(edge[0], edge[1])
#List the the connected nodes
components = [list(c) for c in list(nx.connected_components(self.G))]
if len(components) != len(self.partition):
#compute the Q
cur_Q = self.cal_Q(components, self.G_copy)
if cur_Q not in self.all_Q:
self.all_Q.append(cur_Q)
if cur_Q > self.max_Q:
self.max_Q = cur_Q
self.partition = components
for i in range(len(self.partition)):
self.zidian[i]=self.partition[i]
print('-----------the Divided communities and the Max Q-----------')
print('Max_Q:', self.max_Q)
print('The number of Communites:', len(self.partition))
print("Communites:", self.partition)
return self.partition

def cal_Q(self,partition,G):
m = len(G.edges(None, False))
a = []
e = []
for community in partition:
t = 0.0
for node in community:
t += len([x for x in G.neighbors(node)])
a.append(t/(2*m))
#             self.zidian[t/(2*m)]=community
for community in partition:
t = 0.0
for i in range(len(community)):
for j in range(len(community)):
if(G.has_edge(community[i], community[j])):
t += 1.0
e.append(t/(2*m))

q = 0.0
for ei,ai in zip(e,a):
q += (ei - ai**2)
return q

def add_group(self):
num = 0
nodegroup = {}
for partition in self.partition:
for node in partition:
nodegroup[node] = {'group':num}
num = num + 1
nx.set_node_attributes(self.G_copy, nodegroup)

def to_gml(self):
nx.write_gml(self.G_copy, 'outtoGN.gml')

if __name__ == '__main__':
G=nx.read_gml('data.gml',label='id') #Using max_Q to divide communities
algorithm = GN(G)
algorithm.run()
algorithm.add_group()
algorithm.to_gml()
数据集
data.gml
格式
graph
[
node
[
id "a"
]
node
[
id "b"
]
edge
[
source "a"
target "b"
]
]
注意
  1. 缩进无影响
  2. 需为gml文件
附一段格式生成傻瓜代码
之所以说是傻瓜代码,因为还得将data.txt中全选copy到data.gml中(小声bb)
with open('data.txt','w') as f:
f.write("graph\n")
f.write("[\n")
for i in range(len(vertex)):
f.write("node\n")
f.write("[\n")
f.write("id \""+vertex[i]+"\"\n")
f.write("]\n")
for i in range(len(edges)):
f.write("edge\n")
f.write("[\n")
f.write("source \""+edges[i][0]+"\"\n")
f.write("target \""+edges[i][1]+"\"\n")
f.write("]\n")
f.write("]")

其中vertex为节点集,edges为边集

带权

通过不断删除边权比最大的边并循环(每次都重新计算),直到图中没有边存在

步骤:

  1. 计算网络内所有边相对于源节点的边介数
  2. 将每条边的边介数除以其权值得到边权比
  3. 删除边权比最高的边;
  4. 重复步骤1、步骤2和步骤3;
  5. 直到网络中没有边存在,最后生成的分裂树即为划分的社团;

代码实现

python
源码
# -*- coding: utf-8 -*-
#GN_weight
import networkx as nx
import matplotlib.pyplot as plt
import sys
sys.path.append('../')

class GN_w:
def __init__(self, G):
self.G_copy = G.copy()
self.G = G
self.partition = [[n for n in G.nodes()]]
self.all_Q = [0.0]
self.max_Q = 0.0

#Using max_Q to divide communities
def run(self):
while len(self.G.edges()) != 0:
edges={}
edges_betweenness_centrality = nx.edge_betweenness_centrality(self.G)

for e, ebc in edges_betweenness_centrality.items():
edge_weight = ebc/self.G.get_edge_data(e[0],e[1])['value']
edges[e]=edge_weight

edge = max(edges.items(), key=lambda item:item[1])[0]
self.G.remove_edge(edge[0], edge[1])
components = [list(c) for c in list(nx.connected_components(self.G))]
if len(components) != len(self.partition):
#compute the Q
cur_Q = self.cal_Q(components, self.G_copy)
if cur_Q not in self.all_Q:
self.all_Q.append(cur_Q)
if cur_Q > self.max_Q:
self.max_Q = cur_Q
self.partition = components

print('-----------the divided communities and the Max Q------------')
print('The number of Communites:', len(self.partition))
print('Max_Q:', self.max_Q)
print(self.partition)
return self.partition, self.all_Q, self.max_Q

def group(self,node1):
num1 = 0
nodegroup1 = {}
for partition in self.partition:
for node in partition:
nodegroup1[node] = {'group':num1}
num1 = num1 + 1
nx.set_node_attributes(self.G_copy, nodegroup1)
for partition in self.partition:
for node in partition:
if node==node1:
return nodegroup1[node]['group']
else:pass

def add_group(self):
num = 0
nodegroup = {}
for partition in self.partition:
for node in partition:
nodegroup[node] = {'group':num}
num = num + 1
nx.set_node_attributes(self.G_copy, nodegroup)
return self.partition

def to_gml(self):
nx.write_gml(self.G_copy, 'outtoGN_weighted.gml')

#Computing the Q
def cal_Q(self,partition,G):
m = len(G.edges(None, False))
a = []
e = []

for community in partition:
t = 0.0
for node in community:
t += len([x for x in G.neighbors(node)])
a.append(t/(2*m))

for community in partition:
t = 0.0
for i in range(len(community)):
for j in range(len(community)):
if(G.has_edge(community[i], community[j])):
t += 1.0
e.append(t/(2*m))

q = 0.0
for ei,ai in zip(e,a):
q += (ei - ai**2)
return q

if __name__ == '__main__':
G=nx.read_gml('data.gml')

algorithm = GN_w(G)
algorithm.run()
zidian=algorithm.add_group()
algorithm.to_gml()
数据集
data.gml
格式
graph
[
node
[
id 1
label "a"
]
node
[
id 2
label "b"
]
edge
[
source 1
target 2
value 0.6153846153846154
]
]
注意
  1. 缩进无影响
  2. 需为gml文件
附一段格式生成傻瓜代码
之所以说是傻瓜代码,因为还得将data.txt中全选copy到data.gml中(小声bb)
#GN_weight
with open('data.txt','w') as f:    #设置文件对象
f.write("graph\n")
f.write("[\n")
for i in range(len(vertex)):
f.write("node\n")
f.write("[\n")
f.write("id "+str(vertexxuhao[vertex[i]])+"\n")
f.write("label \""+vertex[i]+"\"\n")
f.write("]\n")
for i in range(len(edges)):
f.write("edge\n")
f.write("[\n")
f.write("source "+str(edgesxuhao[edges[i][0]])+"\n")
f.write("target "+str(edgesxuhao[edges[i][1]])+"\n")
f.write("value "+str(edgesvalue[(edges[i][0],edges[i][1])])+"\n")
f.write("]\n")
f.write("]")

其中vertex、edges分别为节点集和边集,vertexxuhao、edgesxuhao分别为对应序号字典,edgesvalue为每条边对应权值字典

  • 点赞 3
  • 收藏
  • 分享
  • 文章举报
TSATosia 发布了7 篇原创文章 · 获赞 27 · 访问量 1247 私信 关注
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: