From 4856fb5f4755a2296b71fba04c4e5c2906ed9b06 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=A2=81=E9=94=A6=E6=B6=9B?= <1971962997@qq.com> Date: Thu, 20 Jan 2022 06:38:14 +0000 Subject: [PATCH] =?UTF-8?q?add=20=E7=AC=AC=E5=9B=9B=E6=AC=A1=E4=BD=9C?= =?UTF-8?q?=E4=B8=9A.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- ...3\233\346\254\241\344\275\234\344\270\232" | 81 +++++++++++++++++++ 1 file changed, 81 insertions(+) create mode 100644 "\347\254\254\345\233\233\346\254\241\344\275\234\344\270\232" diff --git "a/\347\254\254\345\233\233\346\254\241\344\275\234\344\270\232" "b/\347\254\254\345\233\233\346\254\241\344\275\234\344\270\232" new file mode 100644 index 0000000..81c4df4 --- /dev/null +++ "b/\347\254\254\345\233\233\346\254\241\344\275\234\344\270\232" @@ -0,0 +1,81 @@ +import matplotlib.pyplot as plt +from numpy.ma.core import set_fill_value +from sklearn.datasets import load_digits +import numpy as np + +class LogReg: + def __init__(self, alpha, lam, iter): + digits = load_digits() + self.image = digits.images + #print(self.digits.data.shape) + self.train_size = int(digits.data.shape[0]*0.9) + self.train = digits.data[:self.train_size][:] + self.test = digits.data[self.train_size:][:] + self.train = self.train.astype(np.float64) + self.target = digits.target[:self.train_size] + self.test_target = digits.target[self.train_size:] + + self.theta = np.zeros((10,64))#(种类,8*8) + self.alpha = alpha + self.lam = lam + self.iter = iter + + + def sigmoid(self, x): + return 1 / (1 + np.exp(-x)) #sigmoid function + + def OneHot(self,data): + tmp = np.zeros((10,)) + tmp[data] = 1 + return tmp + + def preprocessing(self): + self.train = np.array([(i-np.min(i))/(np.max(i) - np.min(i)) for i in self.train]).astype(np.float64)#归一化 + self.test = np.array([(i-np.min(i))/(np.max(i) - np.min(i)) for i in self.test]).astype(np.float64)#归一化 + self.target = np.array([self.OneHot(i) for i in self.target]).astype(np.float64)#标签独热化 + + def Costfunction(self,i): + # argument i mean the ith + theta = self.theta[i, :].reshape(self.theta.shape[1], 1) + label = self.target[:,i] + data = self.train + + h = self.sigmoid(data.dot(theta)) + # 代价函数 + cost = (-1/self.train_size) * (label.T.dot(np.log(h))+(1-label).T.dot(np.log(1-h)))\ + + (self.lam/(2*self.train_size)) * theta.T.dot(theta) + + #求梯度 + grad = (1 / self.train_size) * data.T.dot(h - label.reshape(-1,1)) + # print(h) + grad = grad + (self.lam / self.train_size * theta) + # print(grad.shape) + return cost, grad + + def regression(self): + for i in range(10): + for j in range(self.iter): + [J, grad] = self.Costfunction(i) + self.theta[i, :] = self.theta[i, :] - self.alpha * grad.T + print(J) + # print(self.theta) + return self.theta + + def predict(self): + # calc 10 Hypothesis functions and select the max one + # use argmax(1) to get the index of max_val of each row + pred = self.sigmoid(self.test.dot(self.theta.T)).argmax(1) + num = 0 + for i in range(len(self.test_target)): + if(pred[i] == self.test_target[i]): + num += 1 + print(float(num)/float(len(self.test_target))*100.0,"%") + +def main(): + LR = LogReg( 1, 0.1, 3000) + LR.preprocessing() + LR.regression() + LR.predict() + +if __name__ == "__main__": + main() \ No newline at end of file -- Gitee