代码拉取完成,页面将自动刷新
同步操作将从 张亚飞/聚类算法 强制同步,此操作会覆盖自 Fork 仓库以来所做的任何修改,且无法恢复!!!
确定后同步将在后台操作,完成时将刷新页面,请耐心等待。
# -*- coding: utf-8 -*-
"""
@Datetime: 2019/3/31
@Author: Zhang Yafei
"""
# https://www.cnblogs.com/lc1217/p/6908031.html
import functools
import time
import pandas as pd
import numpy as np
from sklearn.cluster import AffinityPropagation
from sklearn.datasets.samples_generator import make_blobs
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.metrics import euclidean_distances, silhouette_score
from sklearn.preprocessing import StandardScaler
def timeit(fun):
@functools.wraps(fun)
def wrapper(*args, **kwargs):
start_time = time.time()
res = fun(*args, **kwargs)
print('运行时间为%.6f' % (time.time() - start_time))
return res
return wrapper
def init_sample():
"""
第一步:生成测试数据
1.生成实际中心为centers的测试样本300个,
2.Xn是包含150个(x,y)点的二维数组
3.labels_true为其对应的真是类别标签
"""
# 生成的测试数据的中心点
centers = [[1, 1], [-1, -1], [1, -1]]
# 生成数据
X, label_true = make_blobs(n_samples=150, centers=centers, cluster_std=0.5, random_state=0)
return X, label_true
def simi_matrix(Xn):
simi = []
for m in Xn:
##每个数字与所有数字的相似度列表,即矩阵中的一行
temp = []
for n in Xn:
##采用负的欧式距离计算相似度
s = np.sqrt((m[0]-n[0])**2 + (m[1]-n[1])**2)
temp.append(s)
simi.append(temp)
return np.around(np.array(simi), decimals=8)
@timeit
def main():
""" 2个特征 """
Xn, label_true = init_sample()
eu_simi_matrix = euclidean_distances(X=Xn, Y=Xn)
print(eu_simi_matrix)
p = -50 ##3个中心
# p = np.min(eu_simi_matrix) ##9个中心,
# p = np.median(simi) ##13个中心
ap = AffinityPropagation(damping=0.5, max_iter=500, convergence_iter=30, preference=p).fit(Xn)
cluster_centers_indices = ap.cluster_centers_indices_
print(ap.labels_)
for idx in cluster_centers_indices:
print(Xn[idx])
if __name__ == '__main__':
# main()
""" 多个特征 """
# 1. 读取数据
beer = pd.read_csv('data.txt', sep=' ')
# 2. 读取特征X, 并标准化
X = beer[beer.columns[beer.columns != 'name']].values
X = StandardScaler().fit_transform(X)
# 3. 相似矩阵
cosine_simi_matrix = cosine_similarity(X)
# eu_simi_matrix = euclidean_distances(X=X, Y=X)
# print(eu_simi_matrix)
# p = -10
# p = np.min(cosine_simi_matrix) # 11个中心,
# p = np.median(cosine_simi_matrix)
# print(p)
# 4. AP聚类
# 选择最优参数
scores = {}
for p in range(-20, -10):
ap = AffinityPropagation(damping=0.5, max_iter=500, convergence_iter=30, preference=p).fit(X)
labels = ap.labels_
print(set(labels).__len__())
score = silhouette_score(X, labels)
scores[p] = score
best_p = sorted(scores.items(), key=lambda x: x[1], reverse=True)[0]
print(best_p)
# 进行聚类
ap = AffinityPropagation(damping=0.5, max_iter=500, convergence_iter=30, preference=best_p[0]).fit(X)
print(set(ap.labels_).__len__())
cluster_centers_indices = ap.cluster_centers_indices_
for idx in cluster_centers_indices:
print(X[idx])
此处可能存在不合适展示的内容,页面不予展示。您可通过相关编辑功能自查并修改。
如您确认内容无涉及 不当用语 / 纯广告导流 / 暴力 / 低俗色情 / 侵权 / 盗版 / 虚假 / 无价值内容或违法国家有关法律法规的内容,可点击提交进行申诉,我们将尽快为您处理。