1 Star 0 Fork 11

木子王大可/聚类算法

forked from 张亚飞/聚类算法 
加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
文件
该仓库未声明开源许可证文件(LICENSE),使用请关注具体项目描述及其代码上游依赖。
克隆/下载
sklearn_AP.py 3.25 KB
一键复制 编辑 原始数据 按行查看 历史
张亚飞 提交于 2019-04-12 21:36 . first
# -*- coding: utf-8 -*-
"""
@Datetime: 2019/3/31
@Author: Zhang Yafei
"""
# https://www.cnblogs.com/lc1217/p/6908031.html
import functools
import time
import pandas as pd
import numpy as np
from sklearn.cluster import AffinityPropagation
from sklearn.datasets.samples_generator import make_blobs
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.metrics import euclidean_distances, silhouette_score
from sklearn.preprocessing import StandardScaler
def timeit(fun):
@functools.wraps(fun)
def wrapper(*args, **kwargs):
start_time = time.time()
res = fun(*args, **kwargs)
print('运行时间为%.6f' % (time.time() - start_time))
return res
return wrapper
def init_sample():
"""
第一步:生成测试数据
1.生成实际中心为centers的测试样本300个,
2.Xn是包含150个(x,y)点的二维数组
3.labels_true为其对应的真是类别标签
"""
# 生成的测试数据的中心点
centers = [[1, 1], [-1, -1], [1, -1]]
# 生成数据
X, label_true = make_blobs(n_samples=150, centers=centers, cluster_std=0.5, random_state=0)
return X, label_true
def simi_matrix(Xn):
simi = []
for m in Xn:
##每个数字与所有数字的相似度列表,即矩阵中的一行
temp = []
for n in Xn:
##采用负的欧式距离计算相似度
s = np.sqrt((m[0]-n[0])**2 + (m[1]-n[1])**2)
temp.append(s)
simi.append(temp)
return np.around(np.array(simi), decimals=8)
@timeit
def main():
""" 2个特征 """
Xn, label_true = init_sample()
eu_simi_matrix = euclidean_distances(X=Xn, Y=Xn)
print(eu_simi_matrix)
p = -50 ##3个中心
# p = np.min(eu_simi_matrix) ##9个中心,
# p = np.median(simi) ##13个中心
ap = AffinityPropagation(damping=0.5, max_iter=500, convergence_iter=30, preference=p).fit(Xn)
cluster_centers_indices = ap.cluster_centers_indices_
print(ap.labels_)
for idx in cluster_centers_indices:
print(Xn[idx])
if __name__ == '__main__':
# main()
""" 多个特征 """
# 1. 读取数据
beer = pd.read_csv('data.txt', sep=' ')
# 2. 读取特征X, 并标准化
X = beer[beer.columns[beer.columns != 'name']].values
X = StandardScaler().fit_transform(X)
# 3. 相似矩阵
cosine_simi_matrix = cosine_similarity(X)
# eu_simi_matrix = euclidean_distances(X=X, Y=X)
# print(eu_simi_matrix)
# p = -10
# p = np.min(cosine_simi_matrix) # 11个中心,
# p = np.median(cosine_simi_matrix)
# print(p)
# 4. AP聚类
# 选择最优参数
scores = {}
for p in range(-20, -10):
ap = AffinityPropagation(damping=0.5, max_iter=500, convergence_iter=30, preference=p).fit(X)
labels = ap.labels_
print(set(labels).__len__())
score = silhouette_score(X, labels)
scores[p] = score
best_p = sorted(scores.items(), key=lambda x: x[1], reverse=True)[0]
print(best_p)
# 进行聚类
ap = AffinityPropagation(damping=0.5, max_iter=500, convergence_iter=30, preference=best_p[0]).fit(X)
print(set(ap.labels_).__len__())
cluster_centers_indices = ap.cluster_centers_indices_
for idx in cluster_centers_indices:
print(X[idx])
Loading...
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化
Python
1
https://gitee.com/muziwangdake/clustering_algorithm.git
git@gitee.com:muziwangdake/clustering_algorithm.git
muziwangdake
clustering_algorithm
聚类算法
master

搜索帮助