代码拉取完成,页面将自动刷新
import pandas as pd
from scipy.spatial.distance import pdist, squareform
from sklearn.manifold import MDS
import matplotlib.pyplot as plt
import networkx as nx
import community as community_louvain
# 读取combined_sum_results.csv文件
cluster_df = pd.read_csv('combined_sum_results.csv', index_col='分类名')
# 保持原始分类名,不简化簇名
# 2. 多维标度法(MDS)
# a. 计算距离矩阵
distance_matrix = pdist(cluster_df.values, metric='euclidean')
distance_matrix_square = squareform(distance_matrix)
# b. 计算目标函数并执行MDS
mds = MDS(n_components=2, dissimilarity="precomputed", random_state=42)
mds_fit = mds.fit_transform(distance_matrix_square)
# 创建一个DataFrame来存储降维后的结果
mds_df = pd.DataFrame(mds_fit, index=cluster_df.index, columns=['Dim1', 'Dim2'])
# c. 可视化MDS结果
plt.figure(figsize=(8, 6))
plt.scatter(mds_df['Dim1'], mds_df['Dim2'])
# 添加簇标签
for i in range(mds_df.shape[0]):
plt.text(mds_df['Dim1'][i], mds_df['Dim2'][i], mds_df.index[i])
plt.title('MDS 2D Visualization of Clusters')
plt.xlabel('Dim1')
plt.ylabel('Dim2')
plt.show()
# 3. 网络分析
# a. 构建基于距离的网络
G = nx.Graph()
# 添加节点
for cluster in mds_df.index:
G.add_node(cluster)
# 添加带权重的边(基于距离矩阵)
for i in range(len(cluster_df)):
for j in range(i+1, len(cluster_df)):
G.add_edge(mds_df.index[i], mds_df.index[j], weight=1/distance_matrix_square[i, j])
# b. 可视化网络
pos = nx.spring_layout(G)
plt.figure(figsize=(10, 8))
# 画节点和边
nx.draw_networkx_nodes(G, pos, node_size=700)
nx.draw_networkx_edges(G, pos, width=[G[u][v]['weight'] for u, v in G.edges()])
nx.draw_networkx_labels(G, pos)
plt.title('Network Visualization of Clusters')
plt.show()
# d. 社区检测
# 使用 Louvain 算法检测社区
partition = community_louvain.best_partition(G)
# 根据社区结果上色
colors = [partition[node] for node in G.nodes()]
plt.figure(figsize=(10, 8))
nx.draw_networkx(G, pos, node_color=colors, with_labels=True, node_size=700, cmap=plt.cm.jet)
plt.title('Louvain Community Detection')
plt.show()
此处可能存在不合适展示的内容,页面不予展示。您可通过相关编辑功能自查并修改。
如您确认内容无涉及 不当用语 / 纯广告导流 / 暴力 / 低俗色情 / 侵权 / 盗版 / 虚假 / 无价值内容或违法国家有关法律法规的内容,可点击提交进行申诉,我们将尽快为您处理。