1 Star 0 Fork 0

whitebear-coder/Machine_Learning

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
文件
该仓库未声明开源许可证文件(LICENSE),使用请关注具体项目描述及其代码上游依赖。
克隆/下载
knn-demo.py 3.39 KB
一键复制 编辑 原始数据 按行查看 历史
whitebear-coder 提交于 2021-10-16 08:58 . knn_demo
'''
author: white mai
program: knn demo
date: 2021,9,24
'''
from sklearn.datasets import load_iris
from sklearn.preprocessing import MinMaxScaler
import numpy as np
from numpy import tile
import matplotlib.pyplot as plt
from scipy.spatial.distance import pdist
# 数据初始化
train_spt = 0.8
# 数据导入
def load_data():
iris = load_iris()
data = iris.data
target = iris.target
# 打乱数据
data = np.concatenate([data, target.reshape(-1, 1)], axis=1)
np.random.shuffle(data)
target = data[:, 4]
data = data[:, 0:4]
return data, target
# 训练集和测试集分离
def train_test_split(datasets, train_spt):
train_size = int(len(datasets) * train_spt)
return datasets[:train_size], datasets[train_size:]
# knn算法部分
# 求距离
def distance(train_data, test_data, k):
# 欧几里得距离
if k == 1:
test_data = tile(test_data, train_data.shape[0]).reshape(-1, train_data.shape[1])
dist = ((train_data - test_data)**2).sum(axis=1)**0.5
return dist
# 曼哈顿距离
elif k == 2:
test_data = tile(test_data, train_data.shape[0]).reshape(-1, train_data.shape[1])
dist = (abs(train_data - test_data)).sum(axis=1)**0.5
return dist
# 闵可夫斯基距离(Minkowski Distance)
elif k == 3:
test_data = tile(test_data, train_data.shape[0]).reshape(-1, train_data.shape[1])
dist = ((train_data - test_data)**train_data.shape[1]).sum(axis=1) ** (1/train_data.shape[1])
return dist
# 余弦距离函数:
elif k == 4:
test_data = tile(test_data, train_data.shape[0]).reshape(-1, train_data.shape[1])
distes = np.array([])
for i in range(test_data.shape[0]):
Vec = np.vstack([train_data[i], test_data[i]])
dist = 1 - pdist(Vec, 'cosine')
distes = np.append(distes, dist)
return distes
# 判断函数
def decide(data):
data = list(data)
return max(set(data), key=data.count)
def knn(train_data, test_data, train_label, test_label, k):
loss = 0
matrix_dist = np.array([])
dist = 0
for i in range(test_data.shape[0]):
dist = distance(train_data, test_data[i, :], 2)
dist = np.concatenate([dist.reshape(-1, 1), train_label.reshape(-1, 1)], axis=1)
dist = dist[np.argsort(dist, 0)[:, 0], :]
# 取前K个元素
ans_label = decide(dist[0:k, 1])
if ans_label != test_label[i]:
loss += 1
# matrix_dist = matrix_dist.reshape(-1, test_data.shape[0])
loss = loss / test_label.shape[0]
return dist, loss
if __name__ == '__main__':
# 模型预处理
data, target = load_data()
data_train, data_test = train_test_split(data, train_spt)
label_train, label_test = train_test_split(target, train_spt)
# min_max归一化
sc = MinMaxScaler(feature_range=(0, 1))
data_train_scaled = sc.fit_transform(data_train)
data_test_scaled = sc.transform(data_test)
losses = np.array([])
for k in range(1, 50):
C, loss = knn(data_train_scaled, data_test_scaled, label_train, label_test, k)
print(loss)
losses = np.append(losses, loss)
ks = np.array([i for i in range(1, 50)])
plt.title('the relationship between k and loss')
plt.xlabel('k')
plt.ylabel('loss')
plt.scatter(ks, losses)
plt.show()
Loading...
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化
1
https://gitee.com/white_mai/Machine_Learning.git
git@gitee.com:white_mai/Machine_Learning.git
white_mai
Machine_Learning
Machine_Learning
main

搜索帮助